mirror of
https://github.com/valitydev/redash.git
synced 2024-11-06 00:55:16 +00:00
Python query runner: add function that transforms pandas dataframe to result format (#5629)
This commit is contained in:
parent
4fddff104a
commit
e28e4227bf
@ -9,6 +9,14 @@ from redash import models
|
||||
from RestrictedPython import compile_restricted
|
||||
from RestrictedPython.Guards import safe_builtins, guarded_iter_unpack_sequence, guarded_unpack_sequence
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
pandas_installed = True
|
||||
except ImportError:
|
||||
pandas_installed = False
|
||||
|
||||
from RestrictedPython.transformer import IOPERATOR_TO_STR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -145,6 +153,14 @@ class Python(BaseQueryRunner):
|
||||
def custom_get_iter(obj):
|
||||
return iter(obj)
|
||||
|
||||
@staticmethod
|
||||
def custom_inplacevar(op, x, y):
|
||||
if op not in IOPERATOR_TO_STR.values():
|
||||
raise Exception("'{} is not supported inplace variable'".format(op))
|
||||
glb = {"x": x, "y": y}
|
||||
exec("x" + op + "y", glb)
|
||||
return glb["x"]
|
||||
|
||||
@staticmethod
|
||||
def add_result_column(result, column_name, friendly_name, column_type):
|
||||
"""Helper function to add columns inside a Python script running in Redash in an easier way
|
||||
@ -179,7 +195,7 @@ class Python(BaseQueryRunner):
|
||||
result["rows"].append(values)
|
||||
|
||||
@staticmethod
|
||||
def execute_query(data_source_name_or_id, query):
|
||||
def execute_query(data_source_name_or_id, query, result_type=None):
|
||||
"""Run query from specific data source.
|
||||
|
||||
Parameters:
|
||||
@ -200,7 +216,13 @@ class Python(BaseQueryRunner):
|
||||
raise Exception(error)
|
||||
|
||||
# TODO: allow avoiding the JSON dumps/loads in same process
|
||||
return json_loads(data)
|
||||
query_result = json_loads(data)
|
||||
|
||||
if result_type == "dataframe" and pandas_installed:
|
||||
return pd.DataFrame(query_result["rows"])
|
||||
|
||||
return query_result
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_source_schema(data_source_name_or_id):
|
||||
@ -239,6 +261,29 @@ class Python(BaseQueryRunner):
|
||||
|
||||
return query.latest_query_data.data
|
||||
|
||||
def dataframe_to_result(self, result, df):
|
||||
|
||||
result["rows"] = df.to_dict("records")
|
||||
|
||||
for column_name, column_type in df.dtypes.items():
|
||||
if column_type == np.bool:
|
||||
redash_type = TYPE_BOOLEAN
|
||||
elif column_type == np.inexact:
|
||||
redash_type = TYPE_FLOAT
|
||||
elif column_type == np.integer:
|
||||
redash_type = TYPE_INTEGER
|
||||
elif column_type in (np.datetime64, np.dtype('<M8[ns]')):
|
||||
if df.empty:
|
||||
redash_type = TYPE_DATETIME
|
||||
elif len(df[column_name].head(1).astype(str).loc[0]) > 10:
|
||||
redash_type = TYPE_DATETIME
|
||||
else:
|
||||
redash_type = TYPE_DATE
|
||||
else:
|
||||
redash_type = TYPE_STRING
|
||||
|
||||
self.add_result_column(result, column_name, column_name, redash_type)
|
||||
|
||||
def get_current_user(self):
|
||||
return self._current_user.to_dict()
|
||||
|
||||
@ -265,6 +310,7 @@ class Python(BaseQueryRunner):
|
||||
builtins["_print_"] = self._custom_print
|
||||
builtins["_unpack_sequence_"] = guarded_unpack_sequence
|
||||
builtins["_iter_unpack_sequence_"] = guarded_iter_unpack_sequence
|
||||
builtins["_inplacevar_"] = self.custom_inplacevar
|
||||
|
||||
# Layer in our own additional set of builtins that we have
|
||||
# considered safe.
|
||||
@ -277,6 +323,8 @@ class Python(BaseQueryRunner):
|
||||
restricted_globals["get_current_user"] = self.get_current_user
|
||||
restricted_globals["execute_query"] = self.execute_query
|
||||
restricted_globals["add_result_column"] = self.add_result_column
|
||||
if pandas_installed:
|
||||
restricted_globals["dataframe_to_result"] = self.dataframe_to_result
|
||||
restricted_globals["add_result_row"] = self.add_result_row
|
||||
restricted_globals["disable_print_log"] = self._custom_print.disable
|
||||
restricted_globals["enable_print_log"] = self._custom_print.enable
|
||||
@ -304,5 +352,4 @@ class Python(BaseQueryRunner):
|
||||
|
||||
return json_data, error
|
||||
|
||||
|
||||
register(Python)
|
||||
|
@ -40,4 +40,5 @@ trino~=0.305
|
||||
cmem-cmempy==21.2.3
|
||||
xlrd==2.0.1
|
||||
openpyxl==3.0.7
|
||||
firebolt-sqlalchemy
|
||||
firebolt-sqlalchemy
|
||||
pandas==1.3.4
|
||||
|
Loading…
Reference in New Issue
Block a user