More robust parsing of worksheets with handling:

- handle the case of empty worksheet.
- handle the case of worksheet with no data.
This commit is contained in:
Arik Fraimovich 2016-07-04 22:21:36 +03:00
parent 4496a004e8
commit 95fa6849b3
2 changed files with 81 additions and 15 deletions

View File

@ -79,9 +79,52 @@ def _value_eval_list(value):
return value_list
class GoogleSpreadsheet(BaseQueryRunner):
HEADER_INDEX = 0
HEADER_INDEX = 0
class WorksheetNotFoundError(Exception):
def __init__(self, worksheet_num, worksheet_count):
message = "Worksheet number {} not found. Spreadsheet has {} worksheets. Note that the worksheet count is zero based.".format(worksheet_num, worksheet_count)
super(WorksheetNotFoundError, self).__init__(message)
def parse_worksheet(worksheet):
if not worksheet:
return {'columns': [], 'rows': []}
column_names = []
columns = []
for j, column_name in enumerate(worksheet[HEADER_INDEX]):
column_names.append(column_name)
columns.append({
'name': column_name,
'friendly_name': column_name,
'type': TYPE_STRING
})
if len(worksheet) > 1:
for j, value in enumerate(worksheet[HEADER_INDEX+1]):
columns[j]['type'] = _guess_type(value)
rows = [dict(zip(column_names, _value_eval_list(row))) for row in worksheet[HEADER_INDEX + 1:]]
data = {'columns': columns, 'rows': rows}
return data
def parse_spreadsheet(spreadsheet, worksheet_num):
worksheets = spreadsheet.worksheets()
worksheet_count = len(worksheets)
if worksheet_num >= worksheet_count:
raise WorksheetNotFoundError(worksheet_num, worksheet_count)
worksheet = worksheets[worksheet_num].get_all_values()
return parse_worksheet(worksheet)
class GoogleSpreadsheet(BaseQueryRunner):
@classmethod
def annotate_query(cls):
return False
@ -129,19 +172,9 @@ class GoogleSpreadsheet(BaseQueryRunner):
try:
spreadsheet_service = self._get_spreadsheet_service()
spreadsheet = spreadsheet_service.open_by_key(key)
worksheets = spreadsheet.worksheets()
all_data = worksheets[worksheet_num].get_all_values()
column_names = []
columns = []
for j, column_name in enumerate(all_data[self.HEADER_INDEX]):
column_names.append(column_name)
columns.append({
'name': column_name,
'friendly_name': column_name,
'type': _guess_type(all_data[self.HEADER_INDEX + 1][j])
})
rows = [dict(zip(column_names, _value_eval_list(row))) for row in all_data[self.HEADER_INDEX + 1:]]
data = {'columns': columns, 'rows': rows}
data = parse_spreadsheet(spreadsheet, worksheet_num)
json_data = json.dumps(data, cls=JSONEncoder)
error = None
except gspread.SpreadsheetNotFound:

View File

@ -1,7 +1,11 @@
# -*- coding: utf-8 -*-
from unittest import TestCase
from mock import Mock, MagicMock
from redash.query_runner.google_spreadsheets import _guess_type, _value_eval_list, TYPE_STRING, TYPE_BOOLEAN
from redash.query_runner.google_spreadsheets import parse_worksheet, parse_spreadsheet, WorksheetNotFoundError
class TestGuessType(TestCase):
@ -26,3 +30,32 @@ class TestValueEvalList(TestCase):
values = ['true', 'false', 'True', 'False', 'TRUE', 'FALSE']
converted_values = [True, False, True, False, True, False]
self.assertEqual(converted_values, _value_eval_list(values))
class TestParseSpreadsheet(TestCase):
def test_returns_meaningful_error_for_missing_worksheet(self):
spreadsheet = MagicMock()
spreadsheet.worksheets = MagicMock(return_value=[])
self.assertRaises(WorksheetNotFoundError, parse_spreadsheet, spreadsheet, 0)
spreadsheet.worksheets = MagicMock(return_value=[1, 2])
self.assertRaises(WorksheetNotFoundError, parse_spreadsheet, spreadsheet, 2)
empty_worksheet = []
only_headers_worksheet = [['Column A', 'Column B']]
regular_worksheet = [['String Column', 'Boolean Column', 'Number Column'], ['A', 'TRUE', '1'], ['B', 'FALSE', '2'], ['C', 'TRUE', '3'], ['D', 'FALSE', '4']]
# The following test that the parse function doesn't crash. They don't test correct output.
class TestParseWorksheet(TestCase):
def test_parse_empty_worksheet(self):
parse_worksheet(empty_worksheet)
def test_parse_only_headers_worksheet(self):
parse_worksheet(only_headers_worksheet)
def test_parse_regular_worksheet(self):
parse_worksheet(regular_worksheet)