mirror of
https://github.com/valitydev/SigmaHQ.git
synced 2024-11-08 18:23:52 +00:00
849a5a520d
Before this method just had some placeholder function that wasn't really implementing the intended functionality of the conditional field mapping. Now aggregations get also conditional field mapping functionality.
658 lines
26 KiB
Python
658 lines
26 KiB
Python
# Sigma parser
|
|
# Copyright 2016-2017 Thomas Patzke, Florian Roth
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Lesser General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Lesser General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU Lesser General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import re
|
|
from .base import SimpleParser
|
|
from .exceptions import SigmaParseError
|
|
|
|
COND_NONE = 0
|
|
COND_AND = 1
|
|
COND_OR = 2
|
|
COND_NOT = 3
|
|
COND_NULL = 4
|
|
|
|
# Debugging code
|
|
def dumpNode(node, indent=''): # pragma: no cover
|
|
"""
|
|
Recursively print the AST rooted at *node* for debugging.
|
|
"""
|
|
if hasattr(node, 'items'):
|
|
print("%s%s<%s>" % (indent, type(node).__name__,
|
|
type(node.items).__name__))
|
|
if type(node.items) != list:
|
|
dumpNode(node.items, indent + ' ')
|
|
else:
|
|
for item in node.items:
|
|
dumpNode(item, indent + ' ')
|
|
else:
|
|
print("%s%s=%s" % (indent, type(node).__name__,
|
|
repr(node)))
|
|
return node
|
|
|
|
# Condition Tokenizer
|
|
class SigmaConditionToken:
|
|
"""Token of a Sigma condition expression"""
|
|
TOKEN_AND = 1
|
|
TOKEN_OR = 2
|
|
TOKEN_NOT = 3
|
|
TOKEN_ID = 4
|
|
TOKEN_LPAR = 5
|
|
TOKEN_RPAR = 6
|
|
TOKEN_PIPE = 7
|
|
TOKEN_ONE = 8
|
|
TOKEN_ALL = 9
|
|
TOKEN_AGG = 10
|
|
TOKEN_EQ = 11
|
|
TOKEN_LT = 12
|
|
TOKEN_LTE = 13
|
|
TOKEN_GT = 14
|
|
TOKEN_GTE = 15
|
|
TOKEN_BY = 16
|
|
TOKEN_NEAR = 17
|
|
|
|
tokenstr = [
|
|
"INVALID",
|
|
"AND",
|
|
"OR",
|
|
"NOT",
|
|
"ID",
|
|
"LPAR",
|
|
"RPAR",
|
|
"PIPE",
|
|
"ONE",
|
|
"ALL",
|
|
"AGG",
|
|
"EQ",
|
|
"LT",
|
|
"LTE",
|
|
"GT",
|
|
"GTE",
|
|
"BY",
|
|
"NEAR",
|
|
]
|
|
|
|
def __init__(self, tokendef, match, pos):
|
|
self.type = tokendef[0]
|
|
self.matched = match.group()
|
|
self.pos = pos
|
|
|
|
def __eq__(self, other):
|
|
if type(other) == int: # match against type
|
|
return self.type == other
|
|
if type(other) == str: # match against content
|
|
return self.matched == other
|
|
else:
|
|
raise NotImplementedError("SigmaConditionToken can only be compared against token type constants")
|
|
|
|
def __str__(self): # pragma: no cover
|
|
return "[ Token: %s: '%s' ]" % (self.tokenstr[self.type], self.matched)
|
|
|
|
class SigmaConditionTokenizer:
|
|
"""Tokenize condition string into token sequence"""
|
|
tokendefs = [ # list of tokens, preferred recognition in given order, (token identifier, matching regular expression). Ignored if token id == None
|
|
(SigmaConditionToken.TOKEN_ONE, re.compile("1 of", re.IGNORECASE)),
|
|
(SigmaConditionToken.TOKEN_ALL, re.compile("all of", re.IGNORECASE)),
|
|
(None, re.compile("[\\s\\r\\n]+")),
|
|
(SigmaConditionToken.TOKEN_AGG, re.compile("count|min|max|avg|sum", re.IGNORECASE)),
|
|
(SigmaConditionToken.TOKEN_NEAR, re.compile("near", re.IGNORECASE)),
|
|
(SigmaConditionToken.TOKEN_BY, re.compile("by", re.IGNORECASE)),
|
|
(SigmaConditionToken.TOKEN_EQ, re.compile("==")),
|
|
(SigmaConditionToken.TOKEN_LT, re.compile("<")),
|
|
(SigmaConditionToken.TOKEN_LTE, re.compile("<=")),
|
|
(SigmaConditionToken.TOKEN_GT, re.compile(">")),
|
|
(SigmaConditionToken.TOKEN_GTE, re.compile(">=")),
|
|
(SigmaConditionToken.TOKEN_PIPE, re.compile("\\|")),
|
|
(SigmaConditionToken.TOKEN_AND, re.compile("and", re.IGNORECASE)),
|
|
(SigmaConditionToken.TOKEN_OR, re.compile("or", re.IGNORECASE)),
|
|
(SigmaConditionToken.TOKEN_NOT, re.compile("not", re.IGNORECASE)),
|
|
(SigmaConditionToken.TOKEN_ID, re.compile("[\\w*]+")),
|
|
(SigmaConditionToken.TOKEN_LPAR, re.compile("\\(")),
|
|
(SigmaConditionToken.TOKEN_RPAR, re.compile("\\)")),
|
|
]
|
|
|
|
def __init__(self, condition):
|
|
if type(condition) == str: # String that is parsed
|
|
self.tokens = list()
|
|
pos = 1
|
|
|
|
while len(condition) > 0:
|
|
for tokendef in self.tokendefs: # iterate over defined tokens and try to recognize the next one
|
|
match = tokendef[1].match(condition)
|
|
if match:
|
|
if tokendef[0] != None:
|
|
self.tokens.append(SigmaConditionToken(tokendef, match, pos + match.start()))
|
|
pos += match.end() # increase position and cut matched prefix from condition
|
|
condition = condition[match.end():]
|
|
break
|
|
else: # no valid token identified
|
|
raise SigmaParseError("Unexpected token in condition at position %s" % condition)
|
|
elif type(condition) == list: # List of tokens to be converted into SigmaConditionTokenizer class
|
|
self.tokens = condition
|
|
else:
|
|
raise TypeError("SigmaConditionTokenizer constructor expects string or list, got %s" % (type(condition)))
|
|
|
|
def __str__(self): # pragma: no cover
|
|
return " ".join([str(token) for token in self.tokens])
|
|
|
|
def __iter__(self):
|
|
return iter(self.tokens)
|
|
|
|
def __len__(self):
|
|
return len(self.tokens)
|
|
|
|
def __getitem__(self, i):
|
|
if type(i) == int:
|
|
return self.tokens[i]
|
|
elif type(i) == slice:
|
|
return SigmaConditionTokenizer(self.tokens[i])
|
|
else:
|
|
raise IndexError("Expected index or slice")
|
|
|
|
def __add__(self, other):
|
|
if isinstance(other, SigmaConditionTokenizer):
|
|
return SigmaConditionTokenizer(self.tokens + other.tokens)
|
|
elif isinstance(other, (SigmaConditionToken, ParseTreeNode)):
|
|
return SigmaConditionTokenizer(self.tokens + [ other ])
|
|
else:
|
|
raise TypeError("+ operator expects SigmaConditionTokenizer or token type, got %s: %s" % (type(other), str(other)))
|
|
|
|
def index(self, item):
|
|
return self.tokens.index(item)
|
|
|
|
### Parse Tree Node Classes ###
|
|
class ParseTreeNode:
|
|
"""Parse Tree Node Base Class"""
|
|
def __init__(self):
|
|
raise NotImplementedError("ConditionBase is no usable class")
|
|
|
|
def __str__(self): # pragma: no cover
|
|
return "[ %s: %s ]" % (self.__doc__, str([str(item) for item in self.items]))
|
|
|
|
class ConditionBase(ParseTreeNode):
|
|
"""Base class for conditional operations"""
|
|
op = COND_NONE
|
|
items = None
|
|
|
|
def __init__(self):
|
|
raise NotImplementedError("ConditionBase is no usable class")
|
|
|
|
def add(self, item):
|
|
self.items.append(item)
|
|
|
|
def __iter__(self):
|
|
return iter(self.items)
|
|
|
|
def __len__(self):
|
|
return len(self.items)
|
|
|
|
class ConditionAND(ConditionBase):
|
|
"""AND Condition"""
|
|
op = COND_AND
|
|
|
|
def __init__(self, sigma=None, op=None, val1=None, val2=None):
|
|
if sigma == None and op == None and val1 == None and val2 == None: # no parameters given - initialize empty
|
|
self.items = list()
|
|
else: # called by parser, use given values
|
|
self.items = [ val1, val2 ]
|
|
|
|
class ConditionOR(ConditionAND):
|
|
"""OR Condition"""
|
|
op = COND_OR
|
|
|
|
class ConditionNOT(ConditionBase):
|
|
"""NOT Condition"""
|
|
op = COND_NOT
|
|
|
|
def __init__(self, sigma=None, op=None, val=None):
|
|
if sigma == None and op == None and val == None: # no parameters given - initialize empty
|
|
self.items = list()
|
|
else: # called by parser, use given values
|
|
self.items = [ val ]
|
|
|
|
def add(self, item):
|
|
if len(self.items) == 0:
|
|
super.add(item)
|
|
else:
|
|
raise ValueError("Only one element allowed")
|
|
|
|
@property
|
|
def item(self):
|
|
try:
|
|
return self.items[0]
|
|
except IndexError:
|
|
return None
|
|
|
|
class ConditionNULLValue(ConditionNOT):
|
|
"""Condition: Field value is empty or doesn't exists"""
|
|
pass
|
|
|
|
class ConditionNotNULLValue(ConditionNULLValue):
|
|
"""Condition: Field value is not empty"""
|
|
pass
|
|
|
|
class NodeSubexpression(ParseTreeNode):
|
|
"""Subexpression"""
|
|
def __init__(self, subexpr):
|
|
self.items = subexpr
|
|
|
|
# Parse tree generators: generate parse tree nodes from extended conditions
|
|
def generateXOf(sigma, val, condclass):
|
|
"""
|
|
Generic implementation of (1|all) of x expressions.
|
|
|
|
* condclass across all list items if x is name of definition
|
|
* condclass across all definitions if x is keyword 'them'
|
|
* condclass across all matching definition if x is wildcard expression, e.g. 'selection*'
|
|
"""
|
|
if val.matched == "them": # OR across all definitions
|
|
cond = condclass()
|
|
for name, definition in sigma.definitions.items():
|
|
if name == "timeframe":
|
|
continue
|
|
cond.add(NodeSubexpression(sigma.parse_definition(definition)))
|
|
return NodeSubexpression(cond)
|
|
elif val.matched.find("*") > 0: # OR across all matching definitions
|
|
cond = condclass()
|
|
reDefPat = re.compile("^" + val.matched.replace("*", ".*") + "$")
|
|
for name, definition in sigma.definitions.items():
|
|
if name != "timeframe" and reDefPat.match(name):
|
|
cond.add(NodeSubexpression(sigma.parse_definition(definition)))
|
|
return NodeSubexpression(cond)
|
|
else: # OR across all items of definition
|
|
return NodeSubexpression(sigma.parse_definition_byname(val.matched, condclass))
|
|
|
|
def generateAllOf(sigma, op, val):
|
|
"""Convert 'all of x' expressions into ConditionAND"""
|
|
return generateXOf(sigma, val, ConditionAND)
|
|
|
|
def generateOneOf(sigma, op, val):
|
|
"""Convert '1 of x' expressions into ConditionOR"""
|
|
return generateXOf(sigma, val, ConditionOR)
|
|
|
|
def convertId(sigma, op):
|
|
"""Convert search identifiers (lists or maps) into condition nodes according to spec defaults"""
|
|
return NodeSubexpression(sigma.parse_definition_byname(op.matched))
|
|
|
|
# Optimizer
|
|
class SigmaConditionOptimizer:
|
|
"""
|
|
Optimizer for the parsed AST.
|
|
"""
|
|
def _stripSubexpressionNode(self, node):
|
|
"""
|
|
Recursively strips all subexpressions (i.e. brackets) from the AST.
|
|
"""
|
|
if type(node) == NodeSubexpression:
|
|
assert(type(node.items) != list)
|
|
return self._stripSubexpressionNode(node.items)
|
|
if hasattr(node, 'items') and type(node) is not ConditionNOT:
|
|
node.items = list(map(self._stripSubexpressionNode, node.items))
|
|
return node
|
|
|
|
def _unstripSubexpressionNode(self, node):
|
|
"""
|
|
Recursively adds brackets around AND and OR operations in the AST.
|
|
"""
|
|
if type(node) in (ConditionAND, ConditionOR):
|
|
newnode = NodeSubexpression(node)
|
|
node.items = list(map(self._unstripSubexpressionNode, node.items))
|
|
return newnode
|
|
return node
|
|
|
|
def _ordered_uniq(self, l):
|
|
"""
|
|
Remove duplicate entries in list *l* while preserving order.
|
|
|
|
Used to be fast before it needed to work around list instead of
|
|
tuple being used for lists within definitions in the AST.
|
|
"""
|
|
seen = set()
|
|
#return [x for x in l if x not in seen and not seen.add(x)]
|
|
uniq = []
|
|
for x in l:
|
|
if type(x) == tuple and type(x[1]) == list:
|
|
x = (x[0], tuple(x[1]))
|
|
if x not in seen and not seen.add(x):
|
|
uniq.append(x)
|
|
out = []
|
|
for x in uniq:
|
|
if type(x) == tuple and type(x[1]) == tuple:
|
|
out.append((x[0], list(x[1])))
|
|
else:
|
|
out.append(x)
|
|
return out
|
|
|
|
def _optimizeNode(self, node, changes=False):
|
|
"""
|
|
Recursively optimize the AST rooted at *node* once. Returns the new
|
|
root node and a boolean indicating if the tree was changed in this
|
|
invocation or any of the recursive sub-invocations.
|
|
|
|
You MUST remove all subexpression nodes from the AST before calling
|
|
this function. Subexpressions are implicit around AND/OR nodes.
|
|
"""
|
|
if type(node) in (ConditionOR, ConditionAND):
|
|
# Remove empty OR(X), AND(X)
|
|
if len(node.items) == 0:
|
|
return None, True
|
|
if None in node.items:
|
|
node.items = [item for item in node.items if item != None]
|
|
return self._optimizeNode(node, changes=True)
|
|
|
|
# OR(X), AND(X) => X
|
|
if len(node.items) == 1:
|
|
return self._optimizeNode(node.items[0], changes=True)
|
|
|
|
# OR(X, X, ...), AND(X, X, ...) => OR(X, ...), AND(X, ...)
|
|
uniq_items = self._ordered_uniq(node.items)
|
|
if len(uniq_items) < len(node.items):
|
|
node.items = uniq_items
|
|
return self._optimizeNode(node, changes=True)
|
|
|
|
# OR(X, OR(Y)) => OR(X, Y)
|
|
if any(type(child) == type(node) for child in node.items) and \
|
|
all(type(child) in (type(node), tuple) for child in node.items):
|
|
newitems = []
|
|
for child in node.items:
|
|
if hasattr(child, 'items'):
|
|
newitems.extend(child.items)
|
|
else:
|
|
newitems.append(child)
|
|
node.items = newitems
|
|
return self._optimizeNode(node, changes=True)
|
|
|
|
# OR(AND(X, ...), AND(X, ...)) => AND(X, OR(AND(...), AND(...)))
|
|
if type(node) == ConditionOR:
|
|
othertype = ConditionAND
|
|
else:
|
|
othertype = ConditionOR
|
|
if all(type(child) == othertype for child in node.items):
|
|
promoted = []
|
|
for cand in node.items[0]:
|
|
if all(cand in child for child in node.items[1:]):
|
|
promoted.append(cand)
|
|
if len(promoted) > 0:
|
|
for child in node.items:
|
|
for cand in promoted:
|
|
child.items.remove(cand)
|
|
newnode = othertype()
|
|
newnode.items = promoted
|
|
newnode.add(node)
|
|
return self._optimizeNode(newnode, changes=True)
|
|
|
|
# fallthrough
|
|
|
|
elif type(node) == ConditionNOT:
|
|
assert(len(node.items) == 1)
|
|
# NOT(NOT(X)) => X
|
|
if type(node.items[0]) == ConditionNOT:
|
|
assert(len(node.items[0].items) == 1)
|
|
return self._optimizeNode(node.items[0].items[0], changes=True)
|
|
|
|
# NOT(ConditionNULLValue) => ConditionNotNULLValue
|
|
if type(node.items[0]) == ConditionNULLValue:
|
|
newnode = ConditionNotNULLValue(val=node.items[0].items[0])
|
|
return self._optimizeNode(newnode, changes=True)
|
|
|
|
# NOT(ConditionNotNULLValue) => ConditionNULLValue
|
|
if type(node.items[0]) == ConditionNotNULLValue:
|
|
newnode = ConditionNULLValue(val=node.items[0].items[0])
|
|
return self._optimizeNode(newnode, changes=True)
|
|
|
|
# fallthrough
|
|
|
|
else:
|
|
return node, changes
|
|
|
|
itemresults = [self._optimizeNode(item, changes) for item in node.items]
|
|
node.items = [res[0] for res in itemresults]
|
|
if any(res[1] for res in itemresults):
|
|
changes = True
|
|
return node, changes
|
|
|
|
def optimizeTree(self, tree):
|
|
"""
|
|
Optimize the boolean expressions in the AST rooted at *tree*.
|
|
|
|
The main idea behind optimizing the AST is that less repeated terms is
|
|
generally better for backend performance. This is especially relevant
|
|
to backends that do not perform any query language optimization down
|
|
the road, such as those that generate code.
|
|
|
|
A common example for when these suboptimal rules actually occur in
|
|
practice is when a rule has multiple alternative detections that are
|
|
OR'ed together in the condition, and all of the detections include a
|
|
common element, such as the same EventID.
|
|
|
|
The following optimizations are currently performed:
|
|
- Removal of empty OR(), AND()
|
|
- OR(X), AND(X) => X
|
|
- OR(X, X, ...), AND(X, X, ...) => OR(X, ...), AND(X, ...)
|
|
- OR(X, OR(Y)) => OR(X, Y)
|
|
- OR(AND(X, ...), AND(X, ...)) => AND(X, OR(AND(...), AND(...)))
|
|
- NOT(NOT(X)) => X
|
|
- NOT(ConditionNULLValue) => ConditionNotNULLValue
|
|
- NOT(ConditionNotNULLValue) => ConditionNULLValue
|
|
|
|
Boolean logic simplification is NP-hard. To avoid backtracking,
|
|
speculative transformations that may or may not lead to a more optimal
|
|
expression were not implemented. These include for example factoring
|
|
out common operands that are not in all, but only some AND()s within an
|
|
OR(), or vice versa. Nevertheless, it is safe to assume that this
|
|
implementation performs poorly on very large expressions.
|
|
"""
|
|
tree = self._stripSubexpressionNode(tree)
|
|
changes = True
|
|
while changes:
|
|
tree, changes = self._optimizeNode(tree)
|
|
tree = self._unstripSubexpressionNode(tree)
|
|
return tree
|
|
|
|
# Condition parser
|
|
class SigmaConditionParser:
|
|
"""Parser for Sigma condition expression"""
|
|
searchOperators = [ # description of operators: (token id, number of operands, parse tree node class) - order == precedence
|
|
(SigmaConditionToken.TOKEN_ALL, 1, generateAllOf),
|
|
(SigmaConditionToken.TOKEN_ONE, 1, generateOneOf),
|
|
(SigmaConditionToken.TOKEN_ID, 0, convertId),
|
|
(SigmaConditionToken.TOKEN_NOT, 1, ConditionNOT),
|
|
(SigmaConditionToken.TOKEN_AND, 2, ConditionAND),
|
|
(SigmaConditionToken.TOKEN_OR, 2, ConditionOR),
|
|
]
|
|
|
|
def __init__(self, sigmaParser, tokens):
|
|
self.sigmaParser = sigmaParser
|
|
self.config = sigmaParser.config
|
|
self._optimizer = SigmaConditionOptimizer()
|
|
|
|
if SigmaConditionToken.TOKEN_PIPE in tokens: # Condition contains atr least one aggregation expression
|
|
pipepos = tokens.index(SigmaConditionToken.TOKEN_PIPE)
|
|
self.parsedSearch = self.parseSearch(tokens[:pipepos])
|
|
self.parsedAgg = SigmaAggregationParser(tokens[pipepos + 1:], self.sigmaParser, self.config)
|
|
else:
|
|
self.parsedSearch = self.parseSearch(tokens)
|
|
self.parsedAgg = None
|
|
|
|
def parseSearch(self, tokens):
|
|
"""
|
|
Iterative parsing of search expression.
|
|
"""
|
|
# 1. Identify subexpressions with parentheses around them and parse them like a separate search expression
|
|
while SigmaConditionToken.TOKEN_LPAR in tokens:
|
|
lPos = tokens.index(SigmaConditionToken.TOKEN_LPAR)
|
|
lTok = tokens[lPos]
|
|
try:
|
|
rPos = tokens.index(SigmaConditionToken.TOKEN_RPAR)
|
|
rTok = tokens[rPos]
|
|
except ValueError as e:
|
|
raise SigmaParseError("Missing matching closing parentheses") from e
|
|
if lPos + 1 == rPos:
|
|
raise SigmaParseError("Empty subexpression at " + str(lTok.pos))
|
|
if lPos > rPos:
|
|
raise SigmaParseError("Closing parentheses at position " + str(rTok.pos) + " precedes opening at position " + str(lTok.pos))
|
|
|
|
subparsed = self.parseSearch(tokens[lPos + 1:rPos])
|
|
tokens = tokens[:lPos] + NodeSubexpression(subparsed) + tokens[rPos + 1:] # replace parentheses + expression with group node that contains parsed subexpression
|
|
|
|
# 2. Iterate over all known operators in given precedence
|
|
for operator in self.searchOperators:
|
|
# 3. reduce all occurrences into corresponding parse tree nodes
|
|
while operator[0] in tokens:
|
|
pos_op = tokens.index(operator[0])
|
|
tok_op = tokens[pos_op]
|
|
if operator[1] == 0: # operator
|
|
treenode = operator[2](self.sigmaParser, tok_op)
|
|
tokens = tokens[:pos_op] + treenode + tokens[pos_op + 1:]
|
|
elif operator[1] == 1: # operator value
|
|
pos_val = pos_op + 1
|
|
tok_val = tokens[pos_val]
|
|
treenode = operator[2](self.sigmaParser, tok_op, tok_val)
|
|
tokens = tokens[:pos_op] + treenode + tokens[pos_val + 1:]
|
|
elif operator[1] == 2: # value1 operator value2
|
|
pos_val1 = pos_op - 1
|
|
pos_val2 = pos_op + 1
|
|
tok_val1 = tokens[pos_val1]
|
|
tok_val2 = tokens[pos_val2]
|
|
treenode = operator[2](self.sigmaParser, tok_op, tok_val1, tok_val2)
|
|
tokens = tokens[:pos_val1] + treenode + tokens[pos_val2 + 1:]
|
|
|
|
if len(tokens) != 1: # parse tree must begin with exactly one node
|
|
raise ValueError("Parse tree must have exactly one start node!")
|
|
query_cond = tokens[0]
|
|
|
|
# 4. Integrate conditions from logsources in configurations
|
|
ls_cond = self.sigmaParser.get_logsource_condition()
|
|
if ls_cond is not None:
|
|
cond = ConditionAND()
|
|
cond.add(ls_cond)
|
|
cond.add(query_cond)
|
|
query_cond = cond
|
|
|
|
return self._optimizer.optimizeTree(query_cond)
|
|
|
|
def __str__(self): # pragma: no cover
|
|
return str(self.parsedSearch)
|
|
|
|
def __len__(self): # pragma: no cover
|
|
return len(self.parsedSearch)
|
|
|
|
# Aggregation parser
|
|
class SigmaAggregationParser(SimpleParser):
|
|
"""Parse Sigma aggregation expression and provide parsed data"""
|
|
parsingrules = [
|
|
{ # State 0
|
|
SigmaConditionToken.TOKEN_AGG: ("aggfunc", "trans_aggfunc", 1),
|
|
SigmaConditionToken.TOKEN_NEAR: ("aggfunc", "init_near_parsing", 8),
|
|
},
|
|
{ # State 1
|
|
SigmaConditionToken.TOKEN_LPAR: (None, None, 2)
|
|
},
|
|
{ # State 2
|
|
SigmaConditionToken.TOKEN_RPAR: (None, None, 4),
|
|
SigmaConditionToken.TOKEN_ID: ("aggfield", "trans_fieldname", 3),
|
|
},
|
|
{ # State 3
|
|
SigmaConditionToken.TOKEN_RPAR: (None, None, 4)
|
|
},
|
|
{ # State 4
|
|
SigmaConditionToken.TOKEN_BY: ("cond_op", None, 5),
|
|
SigmaConditionToken.TOKEN_EQ: ("cond_op", None, 7),
|
|
SigmaConditionToken.TOKEN_LT: ("cond_op", None, 7),
|
|
SigmaConditionToken.TOKEN_LTE: ("cond_op", None, 7),
|
|
SigmaConditionToken.TOKEN_GT: ("cond_op", None, 7),
|
|
SigmaConditionToken.TOKEN_GTE: ("cond_op", None, 7),
|
|
},
|
|
{ # State 5
|
|
SigmaConditionToken.TOKEN_ID: ("groupfield", "trans_fieldname", 6)
|
|
},
|
|
{ # State 6
|
|
SigmaConditionToken.TOKEN_EQ: ("cond_op", None, 7),
|
|
SigmaConditionToken.TOKEN_LT: ("cond_op", None, 7),
|
|
SigmaConditionToken.TOKEN_LTE: ("cond_op", None, 7),
|
|
SigmaConditionToken.TOKEN_GT: ("cond_op", None, 7),
|
|
SigmaConditionToken.TOKEN_GTE: ("cond_op", None, 7),
|
|
},
|
|
{ # State 7
|
|
SigmaConditionToken.TOKEN_ID: ("condition", None, -1)
|
|
},
|
|
{ # State 8
|
|
SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9)
|
|
},
|
|
{ # State 9
|
|
SigmaConditionToken.TOKEN_AND: (None, "set_include", 10),
|
|
},
|
|
{ # State 10
|
|
SigmaConditionToken.TOKEN_NOT: (None, "set_exclude", 8),
|
|
SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9),
|
|
},
|
|
]
|
|
finalstates = { -1, 9 }
|
|
|
|
# Aggregation functions
|
|
AGGFUNC_COUNT = 1
|
|
AGGFUNC_MIN = 2
|
|
AGGFUNC_MAX = 3
|
|
AGGFUNC_AVG = 4
|
|
AGGFUNC_SUM = 5
|
|
AGGFUNC_NEAR = 6
|
|
aggfuncmap = {
|
|
"count": AGGFUNC_COUNT,
|
|
"min": AGGFUNC_MIN,
|
|
"max": AGGFUNC_MAX,
|
|
"avg": AGGFUNC_AVG,
|
|
"sum": AGGFUNC_SUM,
|
|
"near": AGGFUNC_NEAR,
|
|
}
|
|
|
|
def __init__(self, tokens, parser, config):
|
|
self.parser = parser
|
|
self.config = config
|
|
self.aggfield = None
|
|
self.groupfield = None
|
|
super().__init__(tokens)
|
|
|
|
def trans_aggfunc(self, name):
|
|
"""Translate aggregation function name into constant"""
|
|
try:
|
|
return self.aggfuncmap[name]
|
|
except KeyError:
|
|
raise SigmaParseError("Unknown aggregation function '%s'" % (name))
|
|
|
|
def trans_fieldname(self, fieldname):
|
|
"""Translate field name into configured mapped name"""
|
|
mapped = self.config.get_fieldmapping(fieldname).resolve_fieldname(fieldname, self.parser)
|
|
if type(mapped) == str:
|
|
return mapped
|
|
else:
|
|
raise NotImplementedError("Field mappings in aggregations must be single valued")
|
|
|
|
def init_near_parsing(self, name):
|
|
"""Initialize data structures for 'near" aggregation operator parsing"""
|
|
self.include = list()
|
|
self.exclude = list()
|
|
self.current = self.include
|
|
return self.trans_aggfunc(name)
|
|
|
|
def store_search_id(self, name):
|
|
self.current.append(name)
|
|
return name
|
|
|
|
def set_include(self, name):
|
|
self.current = self.include
|
|
|
|
def set_exclude(self, name):
|
|
self.current = self.exclude
|