From 371b319e923fda457783a108d2f4fa3e1f191d0d Mon Sep 17 00:00:00 2001 From: Omer Lachish Date: Tue, 29 Jan 2019 09:18:07 +0200 Subject: [PATCH] Server-side parameter validation (#3315) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * stop testing `collect_query_parameters`, it's an implementation detail * add tests for `missing_query_params` * rename SQLQuery -> ParameterizedSqlQuery * rename sql_query.py to parameterized_query.py * split to parameterized queries and parameterized SQL queries, where parameterized queries only do templating and parameterized SQL queries add tree validation on top of it * move missing parameter detection to ParameterizedQuery * get rid of some old code * fix tests * set syntax to `custom` * revert the max-age-related refactoring * 👋 tree validations 😢 * BaseQueryRunner is no longer a factory for ParameterizedQuery, for now * add an endpoint for running a query by its id and (optional) parameters without having to provide the query text * adds parameter schema to ParameterizedQuery * adds parameter schema validation (currently for strings) * validate number parameters * validate date parameters * validate parameters on POST /api/queries//results * validate enum parameters * validate date range parameters * validate query-based dropdowns by preprocessing them at the handler level and converting them to a populated enum * change _is_date_range to be a tad more succinct * a single assignment with a `map` is sufficiently explanatory * Update redash/utils/parameterized_query.py Co-Authored-By: rauchy * Update redash/utils/parameterized_query.py Co-Authored-By: rauchy * Update redash/utils/parameterized_query.py Co-Authored-By: rauchy * Update redash/utils/parameterized_query.py Co-Authored-By: rauchy * Update redash/handlers/query_results.py Co-Authored-By: rauchy * Update redash/utils/parameterized_query.py Co-Authored-By: rauchy * build error message inside the error * support all types of numbers as number parameters * check for permissions when populating query-based dropdowns * check for access to query before running it * check for empty rows when populating query-based enums * don't bother loading query results if user doesn't have access * 💥 on unexpected parameter types * parameter schema default is a list, not a dictionary * remove redundant null guards --- redash/handlers/query_results.py | 27 ++++++-- redash/utils/parameterized_query.py | 58 ++++++++++++++-- tests/utils/test_parameterized_query.py | 92 ++++++++++++++++++++++++- 3 files changed, 168 insertions(+), 9 deletions(-) diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index 575f5766..a3e83fad 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -10,7 +10,7 @@ from redash.permissions import (has_access, not_view_only, require_access, require_permission, view_only) from redash.tasks import QueryTask from redash.tasks.queries import enqueue_query -from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow) +from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, json_loads, utcnow) from redash.utils.parameterized_query import ParameterizedQuery @@ -64,7 +64,7 @@ def run_query_sync(data_source, parameter_values, query_text, max_age=0): return None -def run_query(data_source, parameter_values, query_text, query_id, max_age=0): +def run_query(data_source, parameter_values, query_text, query_id, max_age=0, parameter_schema=None): if data_source.paused: if data_source.pause_reason: message = '{} is paused ({}). Please try later.'.format(data_source.name, data_source.pause_reason) @@ -73,7 +73,7 @@ def run_query(data_source, parameter_values, query_text, query_id, max_age=0): return error_response(message) - query = ParameterizedQuery(query_text).apply(parameter_values) + query = ParameterizedQuery(query_text, parameter_schema).apply(parameter_values) if query.missing_params: return error_response(u'Missing parameter value for: {}'.format(u", ".join(query.missing_params))) @@ -154,6 +154,23 @@ class QueryResultResource(BaseResource): return make_response("", 200, headers) + def _fetch_rows(self, query_id): + query = models.Query.get_by_id_and_org(query_id, self.current_org) + require_access(query.data_source.groups, self.current_user, view_only) + + query_result = models.QueryResult.get_by_id_and_org(query.latest_query_data_id, self.current_org) + + return json_loads(query_result.data)["rows"] + + def _convert_queries_to_enums(self, definition): + if definition["type"] == "query": + definition["type"] = "enum" + + rows = self._fetch_rows(definition.pop("queryId")) + definition["enumOptions"] = [row.get('value', row.get(row.keys()[0])) for row in rows if row] + + return definition + @require_permission('execute_query') def post(self, query_id): """ @@ -171,11 +188,13 @@ class QueryResultResource(BaseResource): max_age = int(params.get('max_age', 0)) query = get_object_or_404(models.Query.get_by_id_and_org, query_id, self.current_org) + parameter_schema = map(self._convert_queries_to_enums, + query.options.get("parameters", [])) if not has_access(query.data_source.groups, self.current_user, not_view_only): return {'job': {'status': 4, 'error': 'You do not have permission to run queries with this data source.'}}, 403 else: - return run_query(query.data_source, parameters, query.query_text, query_id, max_age) + return run_query(query.data_source, parameters, query.query_text, query_id, max_age, parameter_schema=parameter_schema) @require_permission('view_query') def get(self, query_id=None, query_result_id=None, filetype='json'): diff --git a/redash/utils/parameterized_query.py b/redash/utils/parameterized_query.py index 4f9a8735..c1ec2d38 100644 --- a/redash/utils/parameterized_query.py +++ b/redash/utils/parameterized_query.py @@ -1,7 +1,8 @@ import pystache +from numbers import Number from redash.utils import mustache_render from funcy import distinct - +from dateutil.parser import parse def _collect_key_names(nodes): keys = [] @@ -33,17 +34,60 @@ def _parameter_names(parameter_values): return names +def _is_date(string): + try: + parse(string) + return True + except ValueError: + return False + + +def _is_date_range(obj): + try: + return _is_date(obj["start"]) and _is_date(obj["end"]) + except (KeyError, TypeError): + return False + + class ParameterizedQuery(object): - def __init__(self, template): + def __init__(self, template, schema=None): + self.schema = schema or [] self.template = template self.query = template self.parameters = {} def apply(self, parameters): - self.parameters.update(parameters) - self.query = mustache_render(self.template, self.parameters) + invalid_parameter_names = [key for (key, value) in parameters.iteritems() if not self._valid(key, value)] + if invalid_parameter_names: + raise InvalidParameterError(invalid_parameter_names) + else: + self.parameters.update(parameters) + self.query = mustache_render(self.template, self.parameters) + return self + def _valid(self, name, value): + definition = next((definition for definition in self.schema if definition["name"] == name), None) + + if not definition: + return True + + validators = { + "text": lambda value: isinstance(value, basestring), + "number": lambda value: isinstance(value, Number), + "enum": lambda value: value in definition["enumOptions"], + "date": _is_date, + "datetime-local": _is_date, + "datetime-with-seconds": _is_date, + "date-range": _is_date_range, + "datetime-range": _is_date_range, + "datetime-range-with-seconds": _is_date_range, + } + + validate = validators.get(definition["type"], lambda x: False) + + return validate(value) + @property def missing_params(self): query_parameters = set(_collect_query_parameters(self.template)) @@ -52,3 +96,9 @@ class ParameterizedQuery(object): @property def text(self): return self.query + + +class InvalidParameterError(Exception): + def __init__(self, parameters): + message = u"The following parameter values are incompatible with their type definitions: {}".format(", ".join(parameters)) + super(InvalidParameterError, self).__init__(message) diff --git a/tests/utils/test_parameterized_query.py b/tests/utils/test_parameterized_query.py index e076dfae..c63acc96 100644 --- a/tests/utils/test_parameterized_query.py +++ b/tests/utils/test_parameterized_query.py @@ -1,6 +1,7 @@ from unittest import TestCase +import pytest -from redash.utils.parameterized_query import ParameterizedQuery +from redash.utils.parameterized_query import ParameterizedQuery, InvalidParameterError class TestParameterizedQuery(TestCase): @@ -41,3 +42,92 @@ class TestParameterizedQuery(TestCase): } }) self.assertEqual(set([]), query.missing_params) + + def test_raises_on_invalid_text_parameters(self): + schema = [{"name": "bar", "type": "text"}] + query = ParameterizedQuery("foo", schema) + + with pytest.raises(InvalidParameterError): + query.apply({"bar": 7}) + + def test_validates_text_parameters(self): + schema = [{"name": "bar", "type": "text"}] + query = ParameterizedQuery("foo {{bar}}", schema) + + query.apply({"bar": u"baz"}) + + self.assertEquals("foo baz", query.text) + + def test_raises_on_invalid_number_parameters(self): + schema = [{"name": "bar", "type": "number"}] + query = ParameterizedQuery("foo", schema) + + with pytest.raises(InvalidParameterError): + query.apply({"bar": "baz"}) + + def test_validates_number_parameters(self): + schema = [{"name": "bar", "type": "number"}] + query = ParameterizedQuery("foo {{bar}}", schema) + + query.apply({"bar": 7}) + + self.assertEquals("foo 7", query.text) + + def test_raises_on_invalid_date_parameters(self): + schema = [{"name": "bar", "type": "date"}] + query = ParameterizedQuery("foo", schema) + + with pytest.raises(InvalidParameterError): + query.apply({"bar": "baz"}) + + def test_validates_date_parameters(self): + schema = [{"name": "bar", "type": "date"}] + query = ParameterizedQuery("foo {{bar}}", schema) + + query.apply({"bar": "2000-01-01 12:00:00"}) + + self.assertEquals("foo 2000-01-01 12:00:00", query.text) + + def test_raises_on_invalid_enum_parameters(self): + schema = [{"name": "bar", "type": "enum", "enumOptions": ["baz", "qux"]}] + query = ParameterizedQuery("foo", schema) + + with pytest.raises(InvalidParameterError): + query.apply({"bar": 7}) + + def test_raises_on_unlisted_enum_value_parameters(self): + schema = [{"name": "bar", "type": "enum", "enumOptions": ["baz", "qux"]}] + query = ParameterizedQuery("foo", schema) + + with pytest.raises(InvalidParameterError): + query.apply({"bar": "shlomo"}) + + def test_validates_enum_parameters(self): + schema = [{"name": "bar", "type": "enum", "enumOptions": ["baz", "qux"]}] + query = ParameterizedQuery("foo {{bar}}", schema) + + query.apply({"bar": "baz"}) + + self.assertEquals("foo baz", query.text) + + def test_raises_on_invalid_date_range_parameters(self): + schema = [{"name": "bar", "type": "date-range"}] + query = ParameterizedQuery("foo", schema) + + with pytest.raises(InvalidParameterError): + query.apply({"bar": "baz"}) + + def test_validates_date_range_parameters(self): + schema = [{"name": "bar", "type": "date-range"}] + query = ParameterizedQuery("foo {{bar.start}} {{bar.end}}", schema) + + query.apply({"bar": {"start": "2000-01-01 12:00:00", "end": "2000-12-31 12:00:00"}}) + + self.assertEquals("foo 2000-01-01 12:00:00 2000-12-31 12:00:00", query.text) + + def test_raises_on_unexpected_param_types(self): + schema = [{"name": "bar", "type": "burrito"}] + query = ParameterizedQuery("foo", schema) + + with pytest.raises(InvalidParameterError): + query.apply({"bar": "baz"})