CSV: correctly serialize booleans and dates. (#3841)

* CSV: correctly serialize booleans and dates.

Closes #3736, closes #2751.

* pep8 fixes

* Move column iteration to a helper function.

* Use elif, as types are mutually exclusive.

* Refactor parsing implementation.

* Move the csv generation fucntion
This commit is contained in:
Arik Fraimovich 2019-05-29 10:45:29 +03:00 committed by GitHub
parent 9480d89e4c
commit 9292ae8d3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 161 additions and 43 deletions

View File

@ -12,6 +12,7 @@ from redash.tasks import QueryTask
from redash.tasks.queries import enqueue_query
from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow, to_filename)
from redash.models.parameterized_query import ParameterizedQuery, InvalidParameterError, dropdown_values
from redash.serializers import serialize_query_result_to_csv, serialize_query_result_to_xlsx
def error_response(message):
@ -279,12 +280,12 @@ class QueryResultResource(BaseResource):
@staticmethod
def make_csv_response(query_result):
headers = {'Content-Type': "text/csv; charset=UTF-8"}
return make_response(query_result.make_csv_content(), 200, headers)
return make_response(serialize_query_result_to_csv(query_result), 200, headers)
@staticmethod
def make_excel_response(query_result):
headers = {'Content-Type': "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}
return make_response(query_result.make_excel_content(), 200, headers)
return make_response(serialize_query_result_to_xlsx(query_result), 200, headers)
class JobResource(BaseResource):

View File

@ -1,12 +1,9 @@
import cStringIO
import csv
import datetime
import calendar
import logging
import time
import pytz
import xlsxwriter
from six import python_2_unicode_compatible, text_type
from sqlalchemy import distinct, or_, and_, UniqueConstraint
from sqlalchemy.dialects import postgresql
@ -25,7 +22,7 @@ from redash.destinations import (get_configuration_schema_for_destination_type,
get_destination)
from redash.metrics import database # noqa: F401
from redash.query_runner import (get_configuration_schema_for_query_runner_type,
get_query_runner)
get_query_runner, TYPE_BOOLEAN, TYPE_DATE, TYPE_DATETIME)
from redash.utils import generate_token, json_dumps, json_loads
from redash.utils.configuration import ConfigurationContainer
from redash.models.parameterized_query import ParameterizedQuery
@ -322,41 +319,6 @@ class QueryResult(db.Model, BelongsToOrgMixin):
def groups(self):
return self.data_source.groups
def make_csv_content(self):
s = cStringIO.StringIO()
query_data = json_loads(self.data)
writer = csv.DictWriter(s, extrasaction="ignore", fieldnames=[col['name'] for col in query_data['columns']])
writer.writer = utils.UnicodeWriter(s)
writer.writeheader()
for row in query_data['rows']:
writer.writerow(row)
return s.getvalue()
def make_excel_content(self):
s = cStringIO.StringIO()
query_data = json_loads(self.data)
book = xlsxwriter.Workbook(s, {'constant_memory': True})
sheet = book.add_worksheet("result")
column_names = []
for (c, col) in enumerate(query_data['columns']):
sheet.write(0, c, col['name'])
column_names.append(col['name'])
for (r, row) in enumerate(query_data['rows']):
for (c, name) in enumerate(column_names):
v = row.get(name)
if isinstance(v, list) or isinstance(v, dict):
v = str(v).encode('utf-8')
sheet.write(r + 1, c, v)
book.close()
return s.getvalue()
def should_schedule_next(previous_iteration, now, interval, time=None, day_of_week=None, failures=0):
# if time exists then interval > 23 hours (82800s)

View File

@ -12,6 +12,8 @@ from redash.permissions import has_access, view_only
from redash.utils import json_loads
from redash.models.parameterized_query import ParameterizedQuery
from .query_result import serialize_query_result_to_csv, serialize_query_result_to_xlsx
def public_widget(widget):
res = {

View File

@ -0,0 +1,104 @@
import cStringIO
import csv
import xlsxwriter
from dateutil.parser import parse as parse_date
from redash.utils import json_loads, UnicodeWriter
from redash.query_runner import (TYPE_BOOLEAN, TYPE_DATE, TYPE_DATETIME)
from redash.authentication.org_resolving import current_org
def _convert_format(fmt):
return fmt.replace('DD', '%d').replace('MM', '%m').replace('YYYY', '%Y').replace('YY', '%y').replace('HH', '%H').replace('mm', '%M').replace('ss', '%s')
def _convert_bool(value):
if value is True:
return "true"
elif value is False:
return "false"
return value
def _convert_date(value):
if not value:
return value
parsed = parse_date(value)
return parsed.strftime(_convert_format(current_org.get_setting('date_format')))
def _convert_datetime(value):
if not value:
return value
parsed = parse_date(value)
fmt = _convert_format('{} {}'.format(current_org.get_setting('date_format'), current_org.get_setting('time_format')))
return parsed.strftime(fmt)
SPECIAL_TYPES = {
TYPE_BOOLEAN: _convert_bool,
TYPE_DATE: _convert_date,
TYPE_DATETIME: _convert_datetime
}
def _get_column_lists(columns):
fieldnames = []
special_columns = dict()
for col in columns:
fieldnames.append(col['name'])
for col_type in SPECIAL_TYPES.keys():
if col['type'] == col_type:
special_columns[col['name']] = SPECIAL_TYPES[col_type]
return fieldnames, special_columns
def serialize_query_result_to_csv(query_result):
s = cStringIO.StringIO()
query_data = json_loads(query_result.data)
fieldnames, special_columns = _get_column_lists(query_data['columns'])
writer = csv.DictWriter(s, extrasaction="ignore", fieldnames=fieldnames)
writer.writer = UnicodeWriter(s)
writer.writeheader()
for row in query_data['rows']:
for col_name, converter in special_columns.iteritems():
if col_name in row:
row[col_name] = converter(row[col_name])
writer.writerow(row)
return s.getvalue()
def serialize_query_result_to_xlsx(query_result):
s = cStringIO.StringIO()
query_data = json_loads(query_result.data)
book = xlsxwriter.Workbook(s, {'constant_memory': True})
sheet = book.add_worksheet("result")
column_names = []
for (c, col) in enumerate(query_data['columns']):
sheet.write(0, c, col['name'])
column_names.append(col['name'])
for (r, row) in enumerate(query_data['rows']):
for (c, name) in enumerate(column_names):
v = row.get(name)
if isinstance(v, list) or isinstance(v, dict):
v = str(v).encode('utf-8')
sheet.write(r + 1, c, v)
book.close()
return s.getvalue()

View File

@ -4,7 +4,7 @@ import datetime
from tests import BaseTestCase
from redash import models
from redash.utils import utcnow
from redash.utils import utcnow, json_dumps
class QueryResultTest(BaseTestCase):
@ -66,4 +66,4 @@ class QueryResultTest(BaseTestCase):
models.QueryResult.store_result(query.org_id, query.data_source, query.query_hash, query.query_text, "", 0, utcnow())
self.assertEqual(original_updated_at, query.updated_at)
self.assertEqual(original_updated_at, query.updated_at)

View File

View File

@ -0,0 +1,49 @@
import datetime
import csv
import cStringIO
from tests import BaseTestCase
from redash import models
from redash.utils import utcnow, json_dumps
from redash.serializers import serialize_query_result_to_csv
data = {
"rows": [
{"datetime": "2019-05-26T12:39:23.026Z", "bool": True, "date": "2019-05-26"},
{"datetime": "", "bool": False, "date": ""},
{"datetime": None, "bool": None, "date": None},
],
"columns": [
{"friendly_name": "bool", "type": "boolean", "name": "bool"},
{"friendly_name": "date", "type": "datetime", "name": "datetime"},
{"friendly_name": "date", "type": "date", "name": "date"}
]
}
class CsvSerializationTest(BaseTestCase):
def get_csv_content(self):
query_result = self.factory.create_query_result(data=json_dumps(data))
return serialize_query_result_to_csv(query_result)
def test_serializes_booleans_correctly(self):
with self.app.test_request_context('/'):
parsed = csv.DictReader(cStringIO.StringIO(self.get_csv_content()))
rows = list(parsed)
self.assertEqual(rows[0]['bool'], 'true')
self.assertEqual(rows[1]['bool'], 'false')
self.assertEqual(rows[2]['bool'], '')
def test_serializes_datatime_with_correct_format(self):
with self.app.test_request_context('/'):
parsed = csv.DictReader(cStringIO.StringIO(self.get_csv_content()))
rows = list(parsed)
self.assertEqual(rows[0]['datetime'], '26/05/19 12:39')
self.assertEqual(rows[1]['datetime'], '')
self.assertEqual(rows[2]['datetime'], '')
self.assertEqual(rows[0]['date'], '26/05/19')
self.assertEqual(rows[1]['date'], '')
self.assertEqual(rows[2]['date'], '')