mirror of
https://github.com/valitydev/redash.git
synced 2024-11-06 17:15:17 +00:00
Query Runner: eccenca Corporate Memory (SPARQL) - query RDF / Linked Data Knowledge Graphs with redash (#5415)
* add Corporate Memory Runner based on cmempy 21.2.3 * fix code style * apply some code nice ups * use extendedEnum, boolean and extra_options for schema description * use lower case sorting for data source types list This correctly orders data source names which starts with lower chars (such as eccenca Corporate Memory) * add missing dblogo
This commit is contained in:
parent
fb90b501cb
commit
70681294a3
BIN
client/app/assets/images/db-logos/corporate_memory.png
Normal file
BIN
client/app/assets/images/db-logos/corporate_memory.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.5 KiB |
@ -29,7 +29,8 @@ class DataSourceTypeListResource(BaseResource):
|
|||||||
@require_admin
|
@require_admin
|
||||||
def get(self):
|
def get(self):
|
||||||
return [
|
return [
|
||||||
q.to_dict() for q in sorted(query_runners.values(), key=lambda q: q.name())
|
q.to_dict()
|
||||||
|
for q in sorted(query_runners.values(), key=lambda q: q.name().lower())
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
268
redash/query_runner/corporate_memory.py
Normal file
268
redash/query_runner/corporate_memory.py
Normal file
@ -0,0 +1,268 @@
|
|||||||
|
"""Provide the query runner for eccenca Corporate Memory.
|
||||||
|
|
||||||
|
seeAlso: https://documentation.eccenca.com/
|
||||||
|
seeAlso: https://eccenca.com/
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from os import environ
|
||||||
|
|
||||||
|
from redash.query_runner import BaseQueryRunner
|
||||||
|
from redash.utils import json_dumps, json_loads
|
||||||
|
from . import register
|
||||||
|
|
||||||
|
try:
|
||||||
|
from cmem.cmempy.queries import SparqlQuery, QueryCatalog, QUERY_STRING
|
||||||
|
from cmem.cmempy.dp.proxy.graph import get_graphs_list
|
||||||
|
|
||||||
|
enabled = True
|
||||||
|
except ImportError:
|
||||||
|
enabled = False
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class CorporateMemoryQueryRunner(BaseQueryRunner):
|
||||||
|
"""Use eccenca Corporate Memory as redash data source"""
|
||||||
|
|
||||||
|
# These environment keys are used by cmempy
|
||||||
|
KNOWN_CONFIG_KEYS = (
|
||||||
|
"CMEM_BASE_PROTOCOL",
|
||||||
|
"CMEM_BASE_DOMAIN",
|
||||||
|
"CMEM_BASE_URI",
|
||||||
|
"SSL_VERIFY",
|
||||||
|
"REQUESTS_CA_BUNDLE",
|
||||||
|
"DP_API_ENDPOINT",
|
||||||
|
"DI_API_ENDPOINT",
|
||||||
|
"OAUTH_TOKEN_URI",
|
||||||
|
"OAUTH_GRANT_TYPE",
|
||||||
|
"OAUTH_USER",
|
||||||
|
"OAUTH_PASSWORD",
|
||||||
|
"OAUTH_CLIENT_ID",
|
||||||
|
"OAUTH_CLIENT_SECRET",
|
||||||
|
)
|
||||||
|
|
||||||
|
# These variables hold secret data and should NOT be logged
|
||||||
|
KNOWN_SECRET_KEYS = ("OAUTH_PASSWORD", "OAUTH_CLIENT_SECRET")
|
||||||
|
|
||||||
|
# This allows for an easy connection test
|
||||||
|
noop_query = "SELECT ?noop WHERE {BIND('noop' as ?noop)}"
|
||||||
|
|
||||||
|
# We do not want to have comment in our sparql queries
|
||||||
|
# FEATURE?: Implement annotate_query in case the metadata is useful somewhere
|
||||||
|
should_annotate_query = False
|
||||||
|
|
||||||
|
def __init__(self, configuration):
|
||||||
|
"""init the class and configuration"""
|
||||||
|
super(CorporateMemoryQueryRunner, self).__init__(configuration)
|
||||||
|
"""
|
||||||
|
FEATURE?: activate SPARQL support in the redash query editor
|
||||||
|
Currently SPARQL syntax seems not to be available for react-ace
|
||||||
|
component. However, the ace editor itself supports sparql mode:
|
||||||
|
https://github.com/ajaxorg/ace/blob/master/lib/ace/mode/sparql.js
|
||||||
|
then we can hopefully do: self.syntax = "sparql"
|
||||||
|
FEATURE?: implement the retrieve Query catalog URIs in order to use them in queries
|
||||||
|
FEATURE?: implement a way to use queries from the query catalog
|
||||||
|
FEATURE?: allow a checkbox to NOT use owl:imports imported graphs
|
||||||
|
FEATURE?: allow to use a context graph per data source
|
||||||
|
"""
|
||||||
|
self.configuration = configuration
|
||||||
|
|
||||||
|
def _setup_environment(self):
|
||||||
|
"""provide environment for cmempy
|
||||||
|
|
||||||
|
cmempy environment variables need to match key in the properties
|
||||||
|
object of the configuration_schema
|
||||||
|
"""
|
||||||
|
for key in self.KNOWN_CONFIG_KEYS:
|
||||||
|
if key in environ:
|
||||||
|
environ.pop(key)
|
||||||
|
value = self.configuration.get(key, None)
|
||||||
|
if value is not None:
|
||||||
|
environ[key] = str(value)
|
||||||
|
if key in self.KNOWN_SECRET_KEYS:
|
||||||
|
logger.info("{} set by config".format(key))
|
||||||
|
else:
|
||||||
|
logger.info("{} set by config to {}".format(key, environ[key]))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_sparql_results(results):
|
||||||
|
"""transforms a SPARQL query result to a redash query result
|
||||||
|
|
||||||
|
source structure: SPARQL 1.1 Query Results JSON Format
|
||||||
|
- seeAlso: https://www.w3.org/TR/sparql11-results-json/
|
||||||
|
|
||||||
|
target structure: redash result set
|
||||||
|
there is no good documentation available
|
||||||
|
so here an example result set as needed for redash:
|
||||||
|
data = {
|
||||||
|
"columns": [ {"name": "name", "type": "string", "friendly_name": "friendly name"}],
|
||||||
|
"rows": [
|
||||||
|
{"name": "value 1"},
|
||||||
|
{"name": "value 2"}
|
||||||
|
]}
|
||||||
|
|
||||||
|
FEATURE?: During the sparql_row loop, we could check the data types of the
|
||||||
|
values and, in case they are all the same, choose something better than
|
||||||
|
just string.
|
||||||
|
"""
|
||||||
|
logger.info("results are: {}".format(results))
|
||||||
|
# Not sure why we do not use the json package here but all other
|
||||||
|
# query runner do it the same way :-)
|
||||||
|
sparql_results = json_loads(results)
|
||||||
|
# transform all bindings to redash rows
|
||||||
|
rows = []
|
||||||
|
for sparql_row in sparql_results["results"]["bindings"]:
|
||||||
|
row = {}
|
||||||
|
for var in sparql_results["head"]["vars"]:
|
||||||
|
try:
|
||||||
|
row[var] = sparql_row[var]["value"]
|
||||||
|
except KeyError:
|
||||||
|
# not bound SPARQL variables are set as empty strings
|
||||||
|
row[var] = ""
|
||||||
|
rows.append(row)
|
||||||
|
# transform all vars to redash columns
|
||||||
|
columns = []
|
||||||
|
for var in sparql_results["head"]["vars"]:
|
||||||
|
columns.append({"name": var, "friendly_name": var, "type": "string"})
|
||||||
|
# Not sure why we do not use the json package here but all other
|
||||||
|
# query runner do it the same way :-)
|
||||||
|
return json_dumps({"columns": columns, "rows": rows})
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def name(cls):
|
||||||
|
return "eccenca Corporate Memory (with SPARQL)"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def enabled(cls):
|
||||||
|
return enabled
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def type(cls):
|
||||||
|
return "corporate_memory"
|
||||||
|
|
||||||
|
def run_query(self, query, user):
|
||||||
|
"""send a sparql query to corporate memory"""
|
||||||
|
query_text = query
|
||||||
|
logger.info("about to execute query (user='{}'): {}".format(user, query_text))
|
||||||
|
query = SparqlQuery(query_text)
|
||||||
|
query_type = query.get_query_type()
|
||||||
|
# type of None means, there is an error in the query
|
||||||
|
# so execution is at least tried on endpoint
|
||||||
|
if query_type not in ["SELECT", None]:
|
||||||
|
raise ValueError(
|
||||||
|
"Queries of type {} can not be processed by redash.".format(query_type)
|
||||||
|
)
|
||||||
|
|
||||||
|
self._setup_environment()
|
||||||
|
try:
|
||||||
|
data = self._transform_sparql_results(query.get_results())
|
||||||
|
except Exception as error:
|
||||||
|
logger.info("Error: {}".format(error))
|
||||||
|
try:
|
||||||
|
# try to load Problem Details for HTTP API JSON
|
||||||
|
details = json.loads(error.response.text)
|
||||||
|
error = ""
|
||||||
|
if "title" in details:
|
||||||
|
error += details["title"] + ": "
|
||||||
|
if "detail" in details:
|
||||||
|
error += details["detail"]
|
||||||
|
return None, error
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None, error
|
||||||
|
|
||||||
|
error = None
|
||||||
|
return data, error
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def configuration_schema(cls):
|
||||||
|
"""provide the configuration of the data source as json schema"""
|
||||||
|
return {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"CMEM_BASE_URI": {"type": "string", "title": "Base URL"},
|
||||||
|
"OAUTH_GRANT_TYPE": {
|
||||||
|
"type": "string",
|
||||||
|
"title": "Grant Type",
|
||||||
|
"default": "client_credentials",
|
||||||
|
"extendedEnum": [
|
||||||
|
{"value": "client_credentials", "name": "client_credentials"},
|
||||||
|
{"value": "password", "name": "password"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"OAUTH_CLIENT_ID": {
|
||||||
|
"type": "string",
|
||||||
|
"title": "Client ID (e.g. cmem-service-account)",
|
||||||
|
"default": "cmem-service-account",
|
||||||
|
},
|
||||||
|
"OAUTH_CLIENT_SECRET": {
|
||||||
|
"type": "string",
|
||||||
|
"title": "Client Secret - only needed for grant type 'client_credentials'",
|
||||||
|
},
|
||||||
|
"OAUTH_USER": {
|
||||||
|
"type": "string",
|
||||||
|
"title": "User account - only needed for grant type 'password'",
|
||||||
|
},
|
||||||
|
"OAUTH_PASSWORD": {
|
||||||
|
"type": "string",
|
||||||
|
"title": "User Password - only needed for grant type 'password'",
|
||||||
|
},
|
||||||
|
"SSL_VERIFY": {
|
||||||
|
"type": "boolean",
|
||||||
|
"title": "Verify SSL certificates for API requests",
|
||||||
|
"default": True,
|
||||||
|
},
|
||||||
|
"REQUESTS_CA_BUNDLE": {
|
||||||
|
"type": "string",
|
||||||
|
"title": "Path to the CA Bundle file (.pem)",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["CMEM_BASE_URI", "OAUTH_GRANT_TYPE", "OAUTH_CLIENT_ID"],
|
||||||
|
"secret": ["OAUTH_CLIENT_SECRET", "OAUTH_PASSWORD"],
|
||||||
|
"extra_options": [
|
||||||
|
"OAUTH_GRANT_TYPE",
|
||||||
|
"OAUTH_USER",
|
||||||
|
"OAUTH_PASSWORD",
|
||||||
|
"SSL_VERIFY",
|
||||||
|
"REQUESTS_CA_BUNDLE",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_schema(self, get_stats=False):
|
||||||
|
"""Get the schema structure (prefixes, graphs)."""
|
||||||
|
schema = dict()
|
||||||
|
schema["1"] = {
|
||||||
|
"name": "-> Common Prefixes <-",
|
||||||
|
"columns": self._get_common_prefixes_schema(),
|
||||||
|
}
|
||||||
|
schema["2"] = {"name": "-> Graphs <-", "columns": self._get_graphs_schema()}
|
||||||
|
# schema.update(self._get_query_schema())
|
||||||
|
logger.info(schema.values())
|
||||||
|
return schema.values()
|
||||||
|
|
||||||
|
def _get_graphs_schema(self):
|
||||||
|
"""Get a list of readable graph FROM clause strings."""
|
||||||
|
self._setup_environment()
|
||||||
|
graphs = []
|
||||||
|
for graph in get_graphs_list():
|
||||||
|
graphs.append("FROM <{}>".format(graph["iri"]))
|
||||||
|
return graphs
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_common_prefixes_schema():
|
||||||
|
"""Get a list of SPARQL prefix declarations."""
|
||||||
|
common_prefixes = [
|
||||||
|
"PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
|
||||||
|
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>",
|
||||||
|
"PREFIX owl: <http://www.w3.org/2002/07/owl#>",
|
||||||
|
"PREFIX schema: <http://schema.org/>",
|
||||||
|
"PREFIX dct: <http://purl.org/dc/terms/>",
|
||||||
|
"PREFIX skos: <http://www.w3.org/2004/02/skos/core#>",
|
||||||
|
]
|
||||||
|
return common_prefixes
|
||||||
|
|
||||||
|
|
||||||
|
register(CorporateMemoryQueryRunner)
|
@ -372,6 +372,7 @@ default_query_runners = [
|
|||||||
"redash.query_runner.exasol",
|
"redash.query_runner.exasol",
|
||||||
"redash.query_runner.cloudwatch",
|
"redash.query_runner.cloudwatch",
|
||||||
"redash.query_runner.cloudwatch_insights",
|
"redash.query_runner.cloudwatch_insights",
|
||||||
|
"redash.query_runner.corporate_memory",
|
||||||
]
|
]
|
||||||
|
|
||||||
enabled_query_runners = array_from_string(
|
enabled_query_runners = array_from_string(
|
||||||
|
@ -36,3 +36,4 @@ pyexasol==0.12.0
|
|||||||
python-rapidjson==0.8.0
|
python-rapidjson==0.8.0
|
||||||
pyodbc==4.0.28
|
pyodbc==4.0.28
|
||||||
trino~=0.305
|
trino~=0.305
|
||||||
|
cmem-cmempy==21.2.3
|
||||||
|
Loading…
Reference in New Issue
Block a user