Server-side parameter validation (#3315)

* stop testing `collect_query_parameters`, it's an implementation detail

* add tests for `missing_query_params`

* rename SQLQuery -> ParameterizedSqlQuery

* rename sql_query.py to parameterized_query.py

* split to parameterized queries and parameterized SQL queries, where
parameterized queries only do templating and parameterized SQL queries
add tree validation on top of it

* move missing parameter detection to ParameterizedQuery

* get rid of some old code

* fix tests

* set syntax to `custom`

* revert the max-age-related refactoring

* 👋 tree validations 😢

* BaseQueryRunner is no longer a factory for ParameterizedQuery, for now

* add an endpoint for running a query by its id and (optional) parameters
without having to provide the query text

* adds parameter schema to ParameterizedQuery

* adds parameter schema validation (currently for strings)

* validate number parameters

* validate date parameters

* validate parameters on POST /api/queries/<id>/results

* validate enum parameters

* validate date range parameters

* validate query-based dropdowns by preprocessing them at the handler
level and converting them to a populated enum

* change _is_date_range to be a tad more succinct

* a single assignment with a `map` is sufficiently explanatory

* Update redash/utils/parameterized_query.py

Co-Authored-By: rauchy <omer@rauchy.net>

* Update redash/utils/parameterized_query.py

Co-Authored-By: rauchy <omer@rauchy.net>

* Update redash/utils/parameterized_query.py

Co-Authored-By: rauchy <omer@rauchy.net>

* Update redash/utils/parameterized_query.py

Co-Authored-By: rauchy <omer@rauchy.net>

* Update redash/handlers/query_results.py

Co-Authored-By: rauchy <omer@rauchy.net>

* Update redash/utils/parameterized_query.py

Co-Authored-By: rauchy <omer@rauchy.net>

* build error message inside the error

* support all types of numbers as number parameters

* check for permissions when populating query-based dropdowns

* check for access to query before running it

* check for empty rows when populating query-based enums

* don't bother loading query results if user doesn't have access

* 💥 on unexpected parameter types

* parameter schema default is a list, not a dictionary

* remove redundant null guards
This commit is contained in:
Omer Lachish 2019-01-29 09:18:07 +02:00 committed by GitHub
parent ff42ec2cc6
commit 371b319e92
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 168 additions and 9 deletions

View File

@ -10,7 +10,7 @@ from redash.permissions import (has_access, not_view_only, require_access,
require_permission, view_only)
from redash.tasks import QueryTask
from redash.tasks.queries import enqueue_query
from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow)
from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, json_loads, utcnow)
from redash.utils.parameterized_query import ParameterizedQuery
@ -64,7 +64,7 @@ def run_query_sync(data_source, parameter_values, query_text, max_age=0):
return None
def run_query(data_source, parameter_values, query_text, query_id, max_age=0):
def run_query(data_source, parameter_values, query_text, query_id, max_age=0, parameter_schema=None):
if data_source.paused:
if data_source.pause_reason:
message = '{} is paused ({}). Please try later.'.format(data_source.name, data_source.pause_reason)
@ -73,7 +73,7 @@ def run_query(data_source, parameter_values, query_text, query_id, max_age=0):
return error_response(message)
query = ParameterizedQuery(query_text).apply(parameter_values)
query = ParameterizedQuery(query_text, parameter_schema).apply(parameter_values)
if query.missing_params:
return error_response(u'Missing parameter value for: {}'.format(u", ".join(query.missing_params)))
@ -154,6 +154,23 @@ class QueryResultResource(BaseResource):
return make_response("", 200, headers)
def _fetch_rows(self, query_id):
query = models.Query.get_by_id_and_org(query_id, self.current_org)
require_access(query.data_source.groups, self.current_user, view_only)
query_result = models.QueryResult.get_by_id_and_org(query.latest_query_data_id, self.current_org)
return json_loads(query_result.data)["rows"]
def _convert_queries_to_enums(self, definition):
if definition["type"] == "query":
definition["type"] = "enum"
rows = self._fetch_rows(definition.pop("queryId"))
definition["enumOptions"] = [row.get('value', row.get(row.keys()[0])) for row in rows if row]
return definition
@require_permission('execute_query')
def post(self, query_id):
"""
@ -171,11 +188,13 @@ class QueryResultResource(BaseResource):
max_age = int(params.get('max_age', 0))
query = get_object_or_404(models.Query.get_by_id_and_org, query_id, self.current_org)
parameter_schema = map(self._convert_queries_to_enums,
query.options.get("parameters", []))
if not has_access(query.data_source.groups, self.current_user, not_view_only):
return {'job': {'status': 4, 'error': 'You do not have permission to run queries with this data source.'}}, 403
else:
return run_query(query.data_source, parameters, query.query_text, query_id, max_age)
return run_query(query.data_source, parameters, query.query_text, query_id, max_age, parameter_schema=parameter_schema)
@require_permission('view_query')
def get(self, query_id=None, query_result_id=None, filetype='json'):

View File

@ -1,7 +1,8 @@
import pystache
from numbers import Number
from redash.utils import mustache_render
from funcy import distinct
from dateutil.parser import parse
def _collect_key_names(nodes):
keys = []
@ -33,17 +34,60 @@ def _parameter_names(parameter_values):
return names
def _is_date(string):
try:
parse(string)
return True
except ValueError:
return False
def _is_date_range(obj):
try:
return _is_date(obj["start"]) and _is_date(obj["end"])
except (KeyError, TypeError):
return False
class ParameterizedQuery(object):
def __init__(self, template):
def __init__(self, template, schema=None):
self.schema = schema or []
self.template = template
self.query = template
self.parameters = {}
def apply(self, parameters):
invalid_parameter_names = [key for (key, value) in parameters.iteritems() if not self._valid(key, value)]
if invalid_parameter_names:
raise InvalidParameterError(invalid_parameter_names)
else:
self.parameters.update(parameters)
self.query = mustache_render(self.template, self.parameters)
return self
def _valid(self, name, value):
definition = next((definition for definition in self.schema if definition["name"] == name), None)
if not definition:
return True
validators = {
"text": lambda value: isinstance(value, basestring),
"number": lambda value: isinstance(value, Number),
"enum": lambda value: value in definition["enumOptions"],
"date": _is_date,
"datetime-local": _is_date,
"datetime-with-seconds": _is_date,
"date-range": _is_date_range,
"datetime-range": _is_date_range,
"datetime-range-with-seconds": _is_date_range,
}
validate = validators.get(definition["type"], lambda x: False)
return validate(value)
@property
def missing_params(self):
query_parameters = set(_collect_query_parameters(self.template))
@ -52,3 +96,9 @@ class ParameterizedQuery(object):
@property
def text(self):
return self.query
class InvalidParameterError(Exception):
def __init__(self, parameters):
message = u"The following parameter values are incompatible with their type definitions: {}".format(", ".join(parameters))
super(InvalidParameterError, self).__init__(message)

View File

@ -1,6 +1,7 @@
from unittest import TestCase
import pytest
from redash.utils.parameterized_query import ParameterizedQuery
from redash.utils.parameterized_query import ParameterizedQuery, InvalidParameterError
class TestParameterizedQuery(TestCase):
@ -41,3 +42,92 @@ class TestParameterizedQuery(TestCase):
}
})
self.assertEqual(set([]), query.missing_params)
def test_raises_on_invalid_text_parameters(self):
schema = [{"name": "bar", "type": "text"}]
query = ParameterizedQuery("foo", schema)
with pytest.raises(InvalidParameterError):
query.apply({"bar": 7})
def test_validates_text_parameters(self):
schema = [{"name": "bar", "type": "text"}]
query = ParameterizedQuery("foo {{bar}}", schema)
query.apply({"bar": u"baz"})
self.assertEquals("foo baz", query.text)
def test_raises_on_invalid_number_parameters(self):
schema = [{"name": "bar", "type": "number"}]
query = ParameterizedQuery("foo", schema)
with pytest.raises(InvalidParameterError):
query.apply({"bar": "baz"})
def test_validates_number_parameters(self):
schema = [{"name": "bar", "type": "number"}]
query = ParameterizedQuery("foo {{bar}}", schema)
query.apply({"bar": 7})
self.assertEquals("foo 7", query.text)
def test_raises_on_invalid_date_parameters(self):
schema = [{"name": "bar", "type": "date"}]
query = ParameterizedQuery("foo", schema)
with pytest.raises(InvalidParameterError):
query.apply({"bar": "baz"})
def test_validates_date_parameters(self):
schema = [{"name": "bar", "type": "date"}]
query = ParameterizedQuery("foo {{bar}}", schema)
query.apply({"bar": "2000-01-01 12:00:00"})
self.assertEquals("foo 2000-01-01 12:00:00", query.text)
def test_raises_on_invalid_enum_parameters(self):
schema = [{"name": "bar", "type": "enum", "enumOptions": ["baz", "qux"]}]
query = ParameterizedQuery("foo", schema)
with pytest.raises(InvalidParameterError):
query.apply({"bar": 7})
def test_raises_on_unlisted_enum_value_parameters(self):
schema = [{"name": "bar", "type": "enum", "enumOptions": ["baz", "qux"]}]
query = ParameterizedQuery("foo", schema)
with pytest.raises(InvalidParameterError):
query.apply({"bar": "shlomo"})
def test_validates_enum_parameters(self):
schema = [{"name": "bar", "type": "enum", "enumOptions": ["baz", "qux"]}]
query = ParameterizedQuery("foo {{bar}}", schema)
query.apply({"bar": "baz"})
self.assertEquals("foo baz", query.text)
def test_raises_on_invalid_date_range_parameters(self):
schema = [{"name": "bar", "type": "date-range"}]
query = ParameterizedQuery("foo", schema)
with pytest.raises(InvalidParameterError):
query.apply({"bar": "baz"})
def test_validates_date_range_parameters(self):
schema = [{"name": "bar", "type": "date-range"}]
query = ParameterizedQuery("foo {{bar.start}} {{bar.end}}", schema)
query.apply({"bar": {"start": "2000-01-01 12:00:00", "end": "2000-12-31 12:00:00"}})
self.assertEquals("foo 2000-01-01 12:00:00 2000-12-31 12:00:00", query.text)
def test_raises_on_unexpected_param_types(self):
schema = [{"name": "bar", "type": "burrito"}]
query = ParameterizedQuery("foo", schema)
with pytest.raises(InvalidParameterError):
query.apply({"bar": "baz"})