From 95fa6849b3eceb7978e99be3f1b61dd6fe3d43fb Mon Sep 17 00:00:00 2001 From: Arik Fraimovich Date: Mon, 4 Jul 2016 22:21:36 +0300 Subject: [PATCH] More robust parsing of worksheets with handling: - handle the case of empty worksheet. - handle the case of worksheet with no data. --- redash/query_runner/google_spreadsheets.py | 63 ++++++++++++++----- .../query_runner/test_google_spreadsheets.py | 33 ++++++++++ 2 files changed, 81 insertions(+), 15 deletions(-) diff --git a/redash/query_runner/google_spreadsheets.py b/redash/query_runner/google_spreadsheets.py index 13b8b5db..85128736 100644 --- a/redash/query_runner/google_spreadsheets.py +++ b/redash/query_runner/google_spreadsheets.py @@ -79,9 +79,52 @@ def _value_eval_list(value): return value_list -class GoogleSpreadsheet(BaseQueryRunner): - HEADER_INDEX = 0 +HEADER_INDEX = 0 + +class WorksheetNotFoundError(Exception): + def __init__(self, worksheet_num, worksheet_count): + message = "Worksheet number {} not found. Spreadsheet has {} worksheets. Note that the worksheet count is zero based.".format(worksheet_num, worksheet_count) + super(WorksheetNotFoundError, self).__init__(message) + + +def parse_worksheet(worksheet): + if not worksheet: + return {'columns': [], 'rows': []} + + column_names = [] + columns = [] + + for j, column_name in enumerate(worksheet[HEADER_INDEX]): + column_names.append(column_name) + columns.append({ + 'name': column_name, + 'friendly_name': column_name, + 'type': TYPE_STRING + }) + + if len(worksheet) > 1: + for j, value in enumerate(worksheet[HEADER_INDEX+1]): + columns[j]['type'] = _guess_type(value) + + rows = [dict(zip(column_names, _value_eval_list(row))) for row in worksheet[HEADER_INDEX + 1:]] + data = {'columns': columns, 'rows': rows} + + return data + + +def parse_spreadsheet(spreadsheet, worksheet_num): + worksheets = spreadsheet.worksheets() + worksheet_count = len(worksheets) + if worksheet_num >= worksheet_count: + raise WorksheetNotFoundError(worksheet_num, worksheet_count) + + worksheet = worksheets[worksheet_num].get_all_values() + + return parse_worksheet(worksheet) + + +class GoogleSpreadsheet(BaseQueryRunner): @classmethod def annotate_query(cls): return False @@ -129,19 +172,9 @@ class GoogleSpreadsheet(BaseQueryRunner): try: spreadsheet_service = self._get_spreadsheet_service() spreadsheet = spreadsheet_service.open_by_key(key) - worksheets = spreadsheet.worksheets() - all_data = worksheets[worksheet_num].get_all_values() - column_names = [] - columns = [] - for j, column_name in enumerate(all_data[self.HEADER_INDEX]): - column_names.append(column_name) - columns.append({ - 'name': column_name, - 'friendly_name': column_name, - 'type': _guess_type(all_data[self.HEADER_INDEX + 1][j]) - }) - rows = [dict(zip(column_names, _value_eval_list(row))) for row in all_data[self.HEADER_INDEX + 1:]] - data = {'columns': columns, 'rows': rows} + + data = parse_spreadsheet(spreadsheet, worksheet_num) + json_data = json.dumps(data, cls=JSONEncoder) error = None except gspread.SpreadsheetNotFound: diff --git a/tests/query_runner/test_google_spreadsheets.py b/tests/query_runner/test_google_spreadsheets.py index d66972c1..4a5c9d7c 100644 --- a/tests/query_runner/test_google_spreadsheets.py +++ b/tests/query_runner/test_google_spreadsheets.py @@ -1,7 +1,11 @@ # -*- coding: utf-8 -*- from unittest import TestCase + +from mock import Mock, MagicMock + from redash.query_runner.google_spreadsheets import _guess_type, _value_eval_list, TYPE_STRING, TYPE_BOOLEAN +from redash.query_runner.google_spreadsheets import parse_worksheet, parse_spreadsheet, WorksheetNotFoundError class TestGuessType(TestCase): @@ -26,3 +30,32 @@ class TestValueEvalList(TestCase): values = ['true', 'false', 'True', 'False', 'TRUE', 'FALSE'] converted_values = [True, False, True, False, True, False] self.assertEqual(converted_values, _value_eval_list(values)) + + +class TestParseSpreadsheet(TestCase): + def test_returns_meaningful_error_for_missing_worksheet(self): + spreadsheet = MagicMock() + + spreadsheet.worksheets = MagicMock(return_value=[]) + self.assertRaises(WorksheetNotFoundError, parse_spreadsheet, spreadsheet, 0) + + spreadsheet.worksheets = MagicMock(return_value=[1, 2]) + self.assertRaises(WorksheetNotFoundError, parse_spreadsheet, spreadsheet, 2) + + +empty_worksheet = [] +only_headers_worksheet = [['Column A', 'Column B']] +regular_worksheet = [['String Column', 'Boolean Column', 'Number Column'], ['A', 'TRUE', '1'], ['B', 'FALSE', '2'], ['C', 'TRUE', '3'], ['D', 'FALSE', '4']] + + +# The following test that the parse function doesn't crash. They don't test correct output. +class TestParseWorksheet(TestCase): + def test_parse_empty_worksheet(self): + parse_worksheet(empty_worksheet) + + def test_parse_only_headers_worksheet(self): + parse_worksheet(only_headers_worksheet) + + def test_parse_regular_worksheet(self): + parse_worksheet(regular_worksheet) +