diff --git a/redash/query_runner/google_spreadsheets.py b/redash/query_runner/google_spreadsheets.py index c84b7f49..e8d125c2 100644 --- a/redash/query_runner/google_spreadsheets.py +++ b/redash/query_runner/google_spreadsheets.py @@ -4,6 +4,7 @@ from base64 import b64decode from dateutil import parser from requests import Session +from xlsxwriter.utility import xl_col_to_name from redash.query_runner import * from redash.utils import json_dumps @@ -25,6 +26,29 @@ def _load_key(filename): return json.loads(f.read()) +def _get_columns_and_column_names(row): + column_names = [] + columns = [] + duplicate_counter = 1 + + for i, column_name in enumerate(row): + if not column_name: + column_name = 'column_{}'.format(xl_col_to_name(i)) + + if column_name in column_names: + column_name = u"{}{}".format(column_name, duplicate_counter) + duplicate_counter += 1 + + column_names.append(column_name) + columns.append({ + 'name': column_name, + 'friendly_name': column_name, + 'type': TYPE_STRING + }) + + return columns, column_names + + def _guess_type(value): if value == '': return TYPE_STRING @@ -104,21 +128,7 @@ def parse_worksheet(worksheet): if not worksheet: return {'columns': [], 'rows': []} - column_names = [] - columns = [] - duplicate_counter = 1 - - for j, column_name in enumerate(worksheet[HEADER_INDEX]): - if column_name in column_names: - column_name = u"{}{}".format(column_name, duplicate_counter) - duplicate_counter += 1 - - column_names.append(column_name) - columns.append({ - 'name': column_name, - 'friendly_name': column_name, - 'type': TYPE_STRING - }) + columns, column_names = _get_columns_and_column_names(worksheet[HEADER_INDEX]) if len(worksheet) > 1: for j, value in enumerate(worksheet[HEADER_INDEX + 1]): diff --git a/tests/query_runner/test_google_spreadsheets.py b/tests/query_runner/test_google_spreadsheets.py index f51141c4..1d1cc602 100644 --- a/tests/query_runner/test_google_spreadsheets.py +++ b/tests/query_runner/test_google_spreadsheets.py @@ -5,7 +5,7 @@ from unittest import TestCase from mock import MagicMock from redash.query_runner import TYPE_DATETIME, TYPE_FLOAT, TYPE_INTEGER -from redash.query_runner.google_spreadsheets import TYPE_BOOLEAN, TYPE_STRING, _guess_type, _value_eval_list, parse_query +from redash.query_runner.google_spreadsheets import TYPE_BOOLEAN, TYPE_STRING, _get_columns_and_column_names, _guess_type, _value_eval_list, parse_query from redash.query_runner.google_spreadsheets import WorksheetNotFoundError, parse_spreadsheet, parse_worksheet @@ -106,3 +106,23 @@ class TestParseQuery(TestCase): def test_parse_query(self): parsed = parse_query('key|0') self.assertEqual(('key', 0), parsed) + + +class TestGetColumnsAndColumnNames(TestCase): + def test_get_columns(self): + _columns = ['foo', 'bar', 'baz'] + columns, column_names = _get_columns_and_column_names(_columns) + + self.assertEqual(_columns, column_names) + + def test_get_columns_with_duplicated(self): + _columns = ['foo', 'bar', 'baz', 'foo', 'baz'] + columns, column_names = _get_columns_and_column_names(_columns) + + self.assertEqual(['foo', 'bar', 'baz', 'foo1', 'baz2'], column_names) + + def test_get_columns_with_blank(self): + _columns = ['foo', '', 'baz', ''] + columns, column_names = _get_columns_and_column_names(_columns) + + self.assertEqual(['foo', 'column_B', 'baz', 'column_D'], column_names)