From 0f8da884f93b27abd2c1e7892f093ef0e5238328 Mon Sep 17 00:00:00 2001 From: Arik Fraimovich Date: Sun, 11 Oct 2015 14:44:12 +0300 Subject: [PATCH] Fix #597: MongoDB date parsing logic improvement --- redash/query_runner/mongodb.py | 102 ++++++----------------------- tests/query_runner/__init__.py | 1 + tests/query_runner/test_mongodb.py | 73 +++++++++++++++++++++ 3 files changed, 94 insertions(+), 82 deletions(-) create mode 100644 tests/query_runner/__init__.py create mode 100644 tests/query_runner/test_mongodb.py diff --git a/redash/query_runner/mongodb.py b/redash/query_runner/mongodb.py index dda28880..2ece48db 100644 --- a/redash/query_runner/mongodb.py +++ b/redash/query_runner/mongodb.py @@ -29,7 +29,6 @@ TYPES_MAP = { datetime.datetime: TYPE_DATETIME, } -date_regex = re.compile("ISODate\(\"(.*)\"\)", re.IGNORECASE) class MongoDBJSONEncoder(JSONEncoder): def default(self, o): @@ -38,66 +37,25 @@ class MongoDBJSONEncoder(JSONEncoder): return super(MongoDBJSONEncoder, self).default(o) -# Simple query example: -# -# { -# "collection" : "my_collection", -# "query" : { -# "date" : { -# "$gt" : "ISODate(\"2015-01-15 11:41\")", -# }, -# "type" : 1 -# }, -# "fields" : { -# "_id" : 1, -# "name" : 2 -# }, -# "sort" : [ -# { -# "name" : "date", -# "direction" : -1 -# } -# ] -# -# } -# -# -# Aggregation -# =========== -# Uses a syntax similar to the one used in PyMongo, however to support the -# correct order of sorting, it uses a regular list for the "$sort" operation -# that converts into a SON (sorted dictionary) object before execution. -# -# Aggregation query example: -# -# { -# "collection" : "things", -# "aggregate" : [ -# { -# "$unwind" : "$tags" -# }, -# { -# "$group" : { -# "_id" : "$tags", -# "count" : { "$sum" : 1 } -# } -# }, -# { -# "$sort" : [ -# { -# "name" : "count", -# "direction" : -1 -# }, -# { -# "name" : "_id", -# "direction" : -1 -# } -# ] -# } -# ] -# } -# -# + +date_regex = re.compile("ISODate\(\"(.*)\"\)", re.IGNORECASE) + + +def datetime_parser(dct): + for k, v in dct.iteritems(): + if isinstance(v, basestring): + m = date_regex.findall(v) + if len(m) > 0: + dct[k] = parse(m[0], yearfirst=True) + + return dct + + +def parse_query_json(query): + query_data = json.loads(query, object_hook=datetime_parser) + return query_data + + class MongoDB(BaseQueryRunner): @classmethod def configuration_schema(cls): @@ -144,25 +102,6 @@ class MongoDB(BaseQueryRunner): return None - def _fix_dates(self, data): - for k in data: - if isinstance(data[k], list): - for i in range(0, len(data[k])): - if isinstance(data[k][i], (str, unicode)): - self._convert_date(data[k], i) - elif not isinstance(data[k][i], (int)): - self._fix_dates(data[k][i]) - - elif isinstance(data[k], dict): - self._fix_dates(data[k]) - else: - if isinstance(data[k], (str, unicode)): - self._convert_date(data, k) - - def _convert_date(self, q, field_name): - m = date_regex.findall(q[field_name]) - if len(m) > 0: - q[field_name] = parse(m[0], yearfirst=True) def run_query(self, query): if self.is_replica_set: @@ -176,8 +115,7 @@ class MongoDB(BaseQueryRunner): logger.debug("mongodb got query: %s", query) try: - query_data = json.loads(query) - self._fix_dates(query_data) + query_data = parse_query_json(query) except ValueError: return None, "Invalid query format. The query is not a valid JSON." diff --git a/tests/query_runner/__init__.py b/tests/query_runner/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/query_runner/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/query_runner/test_mongodb.py b/tests/query_runner/test_mongodb.py new file mode 100644 index 00000000..a7aca3a4 --- /dev/null +++ b/tests/query_runner/test_mongodb.py @@ -0,0 +1,73 @@ +import datetime +import json +from unittest import TestCase +from redash.query_runner.mongodb import parse_query_json + + +class TestParseQueryJson(TestCase): + def test_ignores_non_isodate_fields(self): + query = { + 'test': 1, + 'test_list': ['a', 'b', 'c'], + 'test_dict': { + 'a': 1, + 'b': 2 + } + } + + query_data = parse_query_json(json.dumps(query)) + self.assertDictEqual(query_data, query) + + def test_parses_isodate_fields(self): + query = { + 'test': 1, + 'test_list': ['a', 'b', 'c'], + 'test_dict': { + 'a': 1, + 'b': 2 + }, + 'testIsoDate': "ISODate(\"2014-10-03T00:00\")" + } + + query_data = parse_query_json(json.dumps(query)) + + self.assertEqual(query_data['testIsoDate'], datetime.datetime(2014, 10, 3, 0, 0)) + + def test_parses_isodate_in_nested_fields(self): + query = { + 'test': 1, + 'test_list': ['a', 'b', 'c'], + 'test_dict': { + 'a': 1, + 'b': { + 'date': "ISODate(\"2014-10-04T00:00\")" + } + }, + 'testIsoDate': "ISODate(\"2014-10-03T00:00\")" + } + + query_data = parse_query_json(json.dumps(query)) + + self.assertEqual(query_data['testIsoDate'], datetime.datetime(2014, 10, 3, 0, 0)) + self.assertEqual(query_data['test_dict']['b']['date'], datetime.datetime(2014, 10, 4, 0, 0)) + + def test_handles_nested_fields(self): + # https://github.com/EverythingMe/redash/issues/597 + query = { + "collection": "bus", + "aggregate": [ + { + "$geoNear": { + "near": {"type": "Point", "coordinates": [-22.910079, -43.205161]}, + "maxDistance": 100000000, + "distanceField": "dist.calculated", + "includeLocs": "dist.location", + "spherical": True + } + } + ] + } + + query_data = parse_query_json(json.dumps(query)) + + self.assertDictEqual(query, query_data)