add .keyword on aggs; add extra unit test

This commit is contained in:
Anastasios Zouzias 2019-11-14 14:34:50 +01:00
parent e7ed0fa9ea
commit 3c7f522017
2 changed files with 47 additions and 14 deletions

View File

@ -278,15 +278,14 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin
count_distinct_agg_name = "{}_distinct".format(agg.aggfield) count_distinct_agg_name = "{}_distinct".format(agg.aggfield)
script_limit = "params.count {} {}".format(agg.cond_op, agg.condition) script_limit = "params.count {} {}".format(agg.cond_op, agg.condition)
self.queries[-1]['aggs'] = { self.queries[-1]['aggs'] = {
"aggs": { count_agg_group_name: {
count_agg_group_name: {
"terms": { "terms": {
"field": agg.groupfield "field": "{}.keyword".format(agg.groupfield)
}, },
"aggs": { "aggs": {
count_distinct_agg_name: { count_distinct_agg_name: {
"cardinality": { "cardinality": {
"field": agg.aggfield "field": "{}.keyword".format(agg.aggfield)
} }
}, },
"limit": { "limit": {
@ -300,9 +299,8 @@ class ElasticsearchDSLBackend(RulenameCommentMixin, ElasticsearchWildcardHandlin
} }
} }
} }
}
else: # if the condition is count() by MyGroupedField > XYZ else: # if the condition is count() by MyGroupedField > XYZ
group_aggname = "%s_count".format(agg.groupfield) group_aggname = "{}_count".format(agg.groupfield)
self.queries[-1]['aggs'] = { self.queries[-1]['aggs'] = {
group_aggname: { group_aggname: {
'terms': { 'terms': {

View File

@ -4,6 +4,11 @@ from sigma.parser.condition import SigmaAggregationParser
def test_backend_elastic(): def test_backend_elastic():
"""
Test aggregation of the form
count(aggfield) by GroupField < 3
"""
sigma_config = SigmaConfiguration() sigma_config = SigmaConfiguration()
backend = ElasticsearchDSLBackend(sigma_config) backend = ElasticsearchDSLBackend(sigma_config)
@ -13,18 +18,48 @@ def test_backend_elastic():
agg.cond_op = "<" agg.cond_op = "<"
agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT
agg.aggfield = "aggfield" agg.aggfield = "aggfield"
agg.groupfield = "groupfield" agg.groupfield = "GroupField"
# Make queries non-empty # Make queries non-empty
backend.queries = [{}] backend.queries = [{}]
backend.generateAggregation(agg) backend.generateAggregation(agg)
inner_agg = backend.queries[0]["aggs"]["GroupField_count"]["aggs"]
bucket_selector = backend.queries[0]["aggs"]["GroupField_count"]["aggs"]["limit"]["bucket_selector"]
assert len(backend.queries) == 1, "backend has exactly one query" assert len(backend.queries) == 1, "backend has exactly one query"
assert ( assert ("GroupField_count" in backend.queries[0]["aggs"]), "GroupField_count is the top aggregation key"
"groupfield_count" in backend.queries[0]["aggs"]["aggs"] assert ("aggfield_distinct" in backend.queries[0]["aggs"]["GroupField_count"]["aggs"]), "aggfield_distinct is the nested aggregation key"
), "groupfield_count is the top aggregation key" assert ("GroupField_count" in backend.queries[0]["aggs"]), "GroupField_count is the top aggregation key"
assert ( assert "{}.keyword".format(agg.aggfield) == inner_agg["aggfield_distinct"]["cardinality"]["field"], "inner agg field must have suffix .keyword"
"aggfield_distinct" assert ("params.count < 3" in bucket_selector["script"]), "bucket selector script must be 'params.count < 3'"
in backend.queries[0]["aggs"]["aggs"]["groupfield_count"]["aggs"] assert "count" in bucket_selector["buckets_path"], "buckets_path must be 'count'"
), "aggfield_distinct is the nested aggregation key"
def test_backend_elastic_count_nofield_agg():
"""
Test aggregation of the form
count() by GroupedField < 3
"""
sigma_config = SigmaConfiguration()
backend = ElasticsearchDSLBackend(sigma_config)
# setup the aggregator input object without calling __init__()
agg = object.__new__(SigmaAggregationParser)
agg.condition = "3"
agg.cond_op = "<"
agg.aggfunc = SigmaAggregationParser.AGGFUNC_COUNT
agg.aggfield = None
agg.groupfield = "GroupedField"
# Make queries non-empty
backend.queries = [{}]
backend.generateAggregation(agg)
bucket_selector = backend.queries[0]["aggs"]["GroupedField_count"]["aggs"]["limit"]["bucket_selector"]
assert len(backend.queries) == 1, "backend has exactly one query"
assert ("GroupedField_count" in backend.queries[0]["aggs"]), "GroupedField_count is the top aggregation key"
assert ("params.count < 3" in bucket_selector["script"]), "bucket selector script must be 'params.count < 3'"
assert "count" in bucket_selector["buckets_path"], "buckets_path must be 'count'"