2017-02-13 22:29:56 +00:00
|
|
|
# Sigma parser
|
2017-12-07 20:55:43 +00:00
|
|
|
# Copyright 2016-2017 Thomas Patzke, Florian Roth
|
|
|
|
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Lesser General Public License for more details.
|
|
|
|
|
|
|
|
# You should have received a copy of the GNU Lesser General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2017-02-13 22:29:56 +00:00
|
|
|
|
|
|
|
import re
|
2018-07-26 22:02:07 +00:00
|
|
|
from .base import SimpleParser
|
|
|
|
from .exceptions import SigmaParseError
|
2017-10-31 21:13:20 +00:00
|
|
|
|
2017-02-15 23:40:08 +00:00
|
|
|
COND_NONE = 0
|
|
|
|
COND_AND = 1
|
|
|
|
COND_OR = 2
|
|
|
|
COND_NOT = 3
|
2017-10-29 22:57:39 +00:00
|
|
|
COND_NULL = 4
|
2017-02-15 23:40:08 +00:00
|
|
|
|
2018-11-21 22:22:38 +00:00
|
|
|
# Debugging code
|
|
|
|
def dumpNode(node, indent=''): # pragma: no cover
|
|
|
|
"""
|
|
|
|
Recursively print the AST rooted at *node* for debugging.
|
|
|
|
"""
|
|
|
|
if hasattr(node, 'items'):
|
|
|
|
print("%s%s<%s>" % (indent, type(node).__name__,
|
|
|
|
type(node.items).__name__))
|
|
|
|
if type(node.items) != list:
|
|
|
|
dumpNode(node.items, indent + ' ')
|
|
|
|
else:
|
|
|
|
for item in node.items:
|
|
|
|
dumpNode(item, indent + ' ')
|
|
|
|
else:
|
|
|
|
print("%s%s=%s" % (indent, type(node).__name__,
|
|
|
|
repr(node)))
|
|
|
|
return node
|
|
|
|
|
2018-07-26 21:40:22 +00:00
|
|
|
# Condition Tokenizer
|
2017-02-16 22:58:44 +00:00
|
|
|
class SigmaConditionToken:
|
|
|
|
"""Token of a Sigma condition expression"""
|
2017-08-02 22:05:48 +00:00
|
|
|
TOKEN_AND = 1
|
|
|
|
TOKEN_OR = 2
|
|
|
|
TOKEN_NOT = 3
|
|
|
|
TOKEN_ID = 4
|
|
|
|
TOKEN_LPAR = 5
|
|
|
|
TOKEN_RPAR = 6
|
|
|
|
TOKEN_PIPE = 7
|
|
|
|
TOKEN_ONE = 8
|
|
|
|
TOKEN_ALL = 9
|
|
|
|
TOKEN_AGG = 10
|
|
|
|
TOKEN_EQ = 11
|
|
|
|
TOKEN_LT = 12
|
|
|
|
TOKEN_LTE = 13
|
|
|
|
TOKEN_GT = 14
|
|
|
|
TOKEN_GTE = 15
|
|
|
|
TOKEN_BY = 16
|
|
|
|
TOKEN_NEAR = 17
|
2017-02-16 22:58:44 +00:00
|
|
|
|
|
|
|
tokenstr = [
|
|
|
|
"INVALID",
|
|
|
|
"AND",
|
|
|
|
"OR",
|
|
|
|
"NOT",
|
|
|
|
"ID",
|
|
|
|
"LPAR",
|
|
|
|
"RPAR",
|
|
|
|
"PIPE",
|
|
|
|
"ONE",
|
|
|
|
"ALL",
|
|
|
|
"AGG",
|
|
|
|
"EQ",
|
|
|
|
"LT",
|
|
|
|
"LTE",
|
|
|
|
"GT",
|
|
|
|
"GTE",
|
|
|
|
"BY",
|
2017-08-02 22:05:48 +00:00
|
|
|
"NEAR",
|
2017-02-16 22:58:44 +00:00
|
|
|
]
|
|
|
|
|
2017-02-22 21:43:35 +00:00
|
|
|
def __init__(self, tokendef, match, pos):
|
2017-02-16 22:58:44 +00:00
|
|
|
self.type = tokendef[0]
|
|
|
|
self.matched = match.group()
|
2017-02-22 21:43:35 +00:00
|
|
|
self.pos = pos
|
2017-02-16 22:58:44 +00:00
|
|
|
|
|
|
|
def __eq__(self, other):
|
|
|
|
if type(other) == int: # match against type
|
|
|
|
return self.type == other
|
2017-03-29 20:22:01 +00:00
|
|
|
if type(other) == str: # match against content
|
|
|
|
return self.matched == other
|
2017-02-16 22:58:44 +00:00
|
|
|
else:
|
|
|
|
raise NotImplementedError("SigmaConditionToken can only be compared against token type constants")
|
|
|
|
|
2018-11-04 22:28:40 +00:00
|
|
|
def __str__(self): # pragma: no cover
|
2017-02-16 22:58:44 +00:00
|
|
|
return "[ Token: %s: '%s' ]" % (self.tokenstr[self.type], self.matched)
|
|
|
|
|
|
|
|
class SigmaConditionTokenizer:
|
|
|
|
"""Tokenize condition string into token sequence"""
|
|
|
|
tokendefs = [ # list of tokens, preferred recognition in given order, (token identifier, matching regular expression). Ignored if token id == None
|
2017-08-02 22:05:48 +00:00
|
|
|
(SigmaConditionToken.TOKEN_ONE, re.compile("1 of", re.IGNORECASE)),
|
|
|
|
(SigmaConditionToken.TOKEN_ALL, re.compile("all of", re.IGNORECASE)),
|
2017-02-16 22:58:44 +00:00
|
|
|
(None, re.compile("[\\s\\r\\n]+")),
|
2017-08-02 22:05:48 +00:00
|
|
|
(SigmaConditionToken.TOKEN_AGG, re.compile("count|min|max|avg|sum", re.IGNORECASE)),
|
|
|
|
(SigmaConditionToken.TOKEN_NEAR, re.compile("near", re.IGNORECASE)),
|
|
|
|
(SigmaConditionToken.TOKEN_BY, re.compile("by", re.IGNORECASE)),
|
|
|
|
(SigmaConditionToken.TOKEN_EQ, re.compile("==")),
|
|
|
|
(SigmaConditionToken.TOKEN_LT, re.compile("<")),
|
|
|
|
(SigmaConditionToken.TOKEN_LTE, re.compile("<=")),
|
|
|
|
(SigmaConditionToken.TOKEN_GT, re.compile(">")),
|
|
|
|
(SigmaConditionToken.TOKEN_GTE, re.compile(">=")),
|
|
|
|
(SigmaConditionToken.TOKEN_PIPE, re.compile("\\|")),
|
|
|
|
(SigmaConditionToken.TOKEN_AND, re.compile("and", re.IGNORECASE)),
|
|
|
|
(SigmaConditionToken.TOKEN_OR, re.compile("or", re.IGNORECASE)),
|
|
|
|
(SigmaConditionToken.TOKEN_NOT, re.compile("not", re.IGNORECASE)),
|
2018-03-06 22:13:42 +00:00
|
|
|
(SigmaConditionToken.TOKEN_ID, re.compile("[\\w*]+")),
|
2017-08-02 22:05:48 +00:00
|
|
|
(SigmaConditionToken.TOKEN_LPAR, re.compile("\\(")),
|
|
|
|
(SigmaConditionToken.TOKEN_RPAR, re.compile("\\)")),
|
2017-02-16 22:58:44 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
def __init__(self, condition):
|
2017-02-22 21:47:12 +00:00
|
|
|
if type(condition) == str: # String that is parsed
|
|
|
|
self.tokens = list()
|
|
|
|
pos = 1
|
|
|
|
|
|
|
|
while len(condition) > 0:
|
|
|
|
for tokendef in self.tokendefs: # iterate over defined tokens and try to recognize the next one
|
|
|
|
match = tokendef[1].match(condition)
|
|
|
|
if match:
|
|
|
|
if tokendef[0] != None:
|
|
|
|
self.tokens.append(SigmaConditionToken(tokendef, match, pos + match.start()))
|
|
|
|
pos += match.end() # increase position and cut matched prefix from condition
|
|
|
|
condition = condition[match.end():]
|
|
|
|
break
|
|
|
|
else: # no valid token identified
|
2017-06-12 08:07:15 +00:00
|
|
|
raise SigmaParseError("Unexpected token in condition at position %s" % condition)
|
2017-02-22 21:47:12 +00:00
|
|
|
elif type(condition) == list: # List of tokens to be converted into SigmaConditionTokenizer class
|
|
|
|
self.tokens = condition
|
|
|
|
else:
|
|
|
|
raise TypeError("SigmaConditionTokenizer constructor expects string or list, got %s" % (type(condition)))
|
2017-02-16 22:58:44 +00:00
|
|
|
|
2018-11-04 22:28:40 +00:00
|
|
|
def __str__(self): # pragma: no cover
|
2017-02-16 22:58:44 +00:00
|
|
|
return " ".join([str(token) for token in self.tokens])
|
|
|
|
|
2017-02-22 21:43:35 +00:00
|
|
|
def __iter__(self):
|
|
|
|
return iter(self.tokens)
|
|
|
|
|
2017-02-22 21:47:12 +00:00
|
|
|
def __len__(self):
|
|
|
|
return len(self.tokens)
|
|
|
|
|
2017-02-22 21:43:35 +00:00
|
|
|
def __getitem__(self, i):
|
2017-02-22 21:47:12 +00:00
|
|
|
if type(i) == int:
|
|
|
|
return self.tokens[i]
|
|
|
|
elif type(i) == slice:
|
|
|
|
return SigmaConditionTokenizer(self.tokens[i])
|
|
|
|
else:
|
|
|
|
raise IndexError("Expected index or slice")
|
|
|
|
|
|
|
|
def __add__(self, other):
|
|
|
|
if isinstance(other, SigmaConditionTokenizer):
|
|
|
|
return SigmaConditionTokenizer(self.tokens + other.tokens)
|
|
|
|
elif isinstance(other, (SigmaConditionToken, ParseTreeNode)):
|
|
|
|
return SigmaConditionTokenizer(self.tokens + [ other ])
|
|
|
|
else:
|
|
|
|
raise TypeError("+ operator expects SigmaConditionTokenizer or token type, got %s: %s" % (type(other), str(other)))
|
2017-02-22 21:43:35 +00:00
|
|
|
|
|
|
|
def index(self, item):
|
|
|
|
return self.tokens.index(item)
|
2017-03-04 23:37:28 +00:00
|
|
|
|
2017-02-15 23:40:08 +00:00
|
|
|
### Parse Tree Node Classes ###
|
2017-02-22 21:47:12 +00:00
|
|
|
class ParseTreeNode:
|
|
|
|
"""Parse Tree Node Base Class"""
|
|
|
|
def __init__(self):
|
|
|
|
raise NotImplementedError("ConditionBase is no usable class")
|
|
|
|
|
2018-11-04 22:28:40 +00:00
|
|
|
def __str__(self): # pragma: no cover
|
2017-02-22 21:47:12 +00:00
|
|
|
return "[ %s: %s ]" % (self.__doc__, str([str(item) for item in self.items]))
|
|
|
|
|
|
|
|
class ConditionBase(ParseTreeNode):
|
2017-02-15 23:40:08 +00:00
|
|
|
"""Base class for conditional operations"""
|
|
|
|
op = COND_NONE
|
|
|
|
items = None
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
raise NotImplementedError("ConditionBase is no usable class")
|
|
|
|
|
|
|
|
def add(self, item):
|
|
|
|
self.items.append(item)
|
|
|
|
|
2017-02-22 21:47:12 +00:00
|
|
|
def __iter__(self):
|
|
|
|
return iter(self.items)
|
|
|
|
|
2017-03-17 22:28:06 +00:00
|
|
|
def __len__(self):
|
|
|
|
return len(self.items)
|
|
|
|
|
2017-02-15 23:40:08 +00:00
|
|
|
class ConditionAND(ConditionBase):
|
|
|
|
"""AND Condition"""
|
|
|
|
op = COND_AND
|
|
|
|
|
2017-02-22 21:43:35 +00:00
|
|
|
def __init__(self, sigma=None, op=None, val1=None, val2=None):
|
|
|
|
if sigma == None and op == None and val1 == None and val2 == None: # no parameters given - initialize empty
|
|
|
|
self.items = list()
|
|
|
|
else: # called by parser, use given values
|
|
|
|
self.items = [ val1, val2 ]
|
2017-02-15 23:40:08 +00:00
|
|
|
|
|
|
|
class ConditionOR(ConditionAND):
|
|
|
|
"""OR Condition"""
|
|
|
|
op = COND_OR
|
|
|
|
|
|
|
|
class ConditionNOT(ConditionBase):
|
|
|
|
"""NOT Condition"""
|
|
|
|
op = COND_NOT
|
|
|
|
|
2017-02-22 21:43:35 +00:00
|
|
|
def __init__(self, sigma=None, op=None, val=None):
|
|
|
|
if sigma == None and op == None and val == None: # no parameters given - initialize empty
|
2017-02-22 21:47:12 +00:00
|
|
|
self.items = list()
|
2017-02-22 21:43:35 +00:00
|
|
|
else: # called by parser, use given values
|
2017-02-22 21:47:12 +00:00
|
|
|
self.items = [ val ]
|
2017-02-13 22:29:56 +00:00
|
|
|
|
2017-02-15 23:40:08 +00:00
|
|
|
def add(self, item):
|
2017-02-22 21:47:12 +00:00
|
|
|
if len(self.items) == 0:
|
2017-02-15 23:40:08 +00:00
|
|
|
super.add(item)
|
|
|
|
else:
|
2017-10-29 22:57:39 +00:00
|
|
|
raise ValueError("Only one element allowed")
|
2017-02-22 21:43:35 +00:00
|
|
|
|
2017-02-22 21:47:12 +00:00
|
|
|
@property
|
|
|
|
def item(self):
|
|
|
|
try:
|
|
|
|
return self.items[0]
|
|
|
|
except IndexError:
|
|
|
|
return None
|
|
|
|
|
2017-10-29 22:57:39 +00:00
|
|
|
class ConditionNULLValue(ConditionNOT):
|
|
|
|
"""Condition: Field value is empty or doesn't exists"""
|
|
|
|
pass
|
|
|
|
|
|
|
|
class ConditionNotNULLValue(ConditionNULLValue):
|
|
|
|
"""Condition: Field value is not empty"""
|
|
|
|
pass
|
|
|
|
|
2017-02-22 21:47:12 +00:00
|
|
|
class NodeSubexpression(ParseTreeNode):
|
|
|
|
"""Subexpression"""
|
2017-02-22 21:43:35 +00:00
|
|
|
def __init__(self, subexpr):
|
2017-02-22 21:47:12 +00:00
|
|
|
self.items = subexpr
|
2017-02-22 21:43:35 +00:00
|
|
|
|
2018-07-26 22:02:07 +00:00
|
|
|
# Parse tree generators: generate parse tree nodes from extended conditions
|
|
|
|
def generateXOf(sigma, val, condclass):
|
2018-03-06 22:13:42 +00:00
|
|
|
"""
|
|
|
|
Generic implementation of (1|all) of x expressions.
|
|
|
|
|
|
|
|
* condclass across all list items if x is name of definition
|
|
|
|
* condclass across all definitions if x is keyword 'them'
|
|
|
|
* condclass across all matching definition if x is wildcard expression, e.g. 'selection*'
|
|
|
|
"""
|
|
|
|
if val.matched == "them": # OR across all definitions
|
|
|
|
cond = condclass()
|
2019-07-12 22:35:35 +00:00
|
|
|
for name, definition in sigma.definitions.items():
|
|
|
|
if name == "timeframe":
|
|
|
|
continue
|
2018-03-06 22:13:42 +00:00
|
|
|
cond.add(NodeSubexpression(sigma.parse_definition(definition)))
|
|
|
|
return NodeSubexpression(cond)
|
|
|
|
elif val.matched.find("*") > 0: # OR across all matching definitions
|
|
|
|
cond = condclass()
|
|
|
|
reDefPat = re.compile("^" + val.matched.replace("*", ".*") + "$")
|
|
|
|
for name, definition in sigma.definitions.items():
|
2019-07-12 22:35:35 +00:00
|
|
|
if name != "timeframe" and reDefPat.match(name):
|
2018-03-06 22:13:42 +00:00
|
|
|
cond.add(NodeSubexpression(sigma.parse_definition(definition)))
|
|
|
|
return NodeSubexpression(cond)
|
|
|
|
else: # OR across all items of definition
|
|
|
|
return NodeSubexpression(sigma.parse_definition_byname(val.matched, condclass))
|
|
|
|
|
2018-07-26 22:02:07 +00:00
|
|
|
def generateAllOf(sigma, op, val):
|
2018-03-06 22:13:42 +00:00
|
|
|
"""Convert 'all of x' expressions into ConditionAND"""
|
2018-07-26 22:02:07 +00:00
|
|
|
return generateXOf(sigma, val, ConditionAND)
|
2017-02-22 21:43:35 +00:00
|
|
|
|
2018-07-26 22:02:07 +00:00
|
|
|
def generateOneOf(sigma, op, val):
|
2018-03-06 22:13:42 +00:00
|
|
|
"""Convert '1 of x' expressions into ConditionOR"""
|
2018-07-26 22:02:07 +00:00
|
|
|
return generateXOf(sigma, val, ConditionOR)
|
2017-02-22 21:43:35 +00:00
|
|
|
|
|
|
|
def convertId(sigma, op):
|
|
|
|
"""Convert search identifiers (lists or maps) into condition nodes according to spec defaults"""
|
2017-02-22 21:47:12 +00:00
|
|
|
return NodeSubexpression(sigma.parse_definition_byname(op.matched))
|
2017-02-22 21:43:35 +00:00
|
|
|
|
2018-10-02 22:24:31 +00:00
|
|
|
# Optimizer
|
|
|
|
class SigmaConditionOptimizer:
|
|
|
|
"""
|
|
|
|
Optimizer for the parsed AST.
|
|
|
|
"""
|
|
|
|
def _stripSubexpressionNode(self, node):
|
|
|
|
"""
|
|
|
|
Recursively strips all subexpressions (i.e. brackets) from the AST.
|
|
|
|
"""
|
|
|
|
if type(node) == NodeSubexpression:
|
|
|
|
assert(type(node.items) != list)
|
|
|
|
return self._stripSubexpressionNode(node.items)
|
2018-11-07 12:54:45 +00:00
|
|
|
if hasattr(node, 'items') and type(node) is not ConditionNOT:
|
2018-10-02 22:24:31 +00:00
|
|
|
node.items = list(map(self._stripSubexpressionNode, node.items))
|
|
|
|
return node
|
|
|
|
|
|
|
|
def _unstripSubexpressionNode(self, node):
|
|
|
|
"""
|
|
|
|
Recursively adds brackets around AND and OR operations in the AST.
|
|
|
|
"""
|
|
|
|
if type(node) in (ConditionAND, ConditionOR):
|
|
|
|
newnode = NodeSubexpression(node)
|
|
|
|
node.items = list(map(self._unstripSubexpressionNode, node.items))
|
|
|
|
return newnode
|
|
|
|
return node
|
|
|
|
|
|
|
|
def _ordered_uniq(self, l):
|
|
|
|
"""
|
|
|
|
Remove duplicate entries in list *l* while preserving order.
|
|
|
|
|
|
|
|
Used to be fast before it needed to work around list instead of
|
|
|
|
tuple being used for lists within definitions in the AST.
|
|
|
|
"""
|
|
|
|
seen = set()
|
|
|
|
#return [x for x in l if x not in seen and not seen.add(x)]
|
|
|
|
uniq = []
|
|
|
|
for x in l:
|
|
|
|
if type(x) == tuple and type(x[1]) == list:
|
|
|
|
x = (x[0], tuple(x[1]))
|
|
|
|
if x not in seen and not seen.add(x):
|
|
|
|
uniq.append(x)
|
|
|
|
out = []
|
|
|
|
for x in uniq:
|
|
|
|
if type(x) == tuple and type(x[1]) == tuple:
|
|
|
|
out.append((x[0], list(x[1])))
|
|
|
|
else:
|
|
|
|
out.append(x)
|
|
|
|
return out
|
|
|
|
|
|
|
|
def _optimizeNode(self, node, changes=False):
|
|
|
|
"""
|
|
|
|
Recursively optimize the AST rooted at *node* once. Returns the new
|
|
|
|
root node and a boolean indicating if the tree was changed in this
|
2018-10-03 11:44:03 +00:00
|
|
|
invocation or any of the recursive sub-invocations.
|
2018-10-02 22:24:31 +00:00
|
|
|
|
|
|
|
You MUST remove all subexpression nodes from the AST before calling
|
|
|
|
this function. Subexpressions are implicit around AND/OR nodes.
|
|
|
|
"""
|
|
|
|
if type(node) in (ConditionOR, ConditionAND):
|
|
|
|
# Remove empty OR(X), AND(X)
|
|
|
|
if len(node.items) == 0:
|
|
|
|
return None, True
|
|
|
|
if None in node.items:
|
|
|
|
node.items = [item for item in node.items if item != None]
|
|
|
|
return self._optimizeNode(node, changes=True)
|
|
|
|
|
|
|
|
# OR(X), AND(X) => X
|
|
|
|
if len(node.items) == 1:
|
|
|
|
return self._optimizeNode(node.items[0], changes=True)
|
|
|
|
|
|
|
|
# OR(X, X, ...), AND(X, X, ...) => OR(X, ...), AND(X, ...)
|
|
|
|
uniq_items = self._ordered_uniq(node.items)
|
|
|
|
if len(uniq_items) < len(node.items):
|
|
|
|
node.items = uniq_items
|
|
|
|
return self._optimizeNode(node, changes=True)
|
|
|
|
|
|
|
|
# OR(X, OR(Y)) => OR(X, Y)
|
|
|
|
if any(type(child) == type(node) for child in node.items) and \
|
|
|
|
all(type(child) in (type(node), tuple) for child in node.items):
|
|
|
|
newitems = []
|
|
|
|
for child in node.items:
|
|
|
|
if hasattr(child, 'items'):
|
|
|
|
newitems.extend(child.items)
|
|
|
|
else:
|
|
|
|
newitems.append(child)
|
|
|
|
node.items = newitems
|
|
|
|
return self._optimizeNode(node, changes=True)
|
|
|
|
|
|
|
|
# OR(AND(X, ...), AND(X, ...)) => AND(X, OR(AND(...), AND(...)))
|
|
|
|
if type(node) == ConditionOR:
|
|
|
|
othertype = ConditionAND
|
|
|
|
else:
|
|
|
|
othertype = ConditionOR
|
|
|
|
if all(type(child) == othertype for child in node.items):
|
|
|
|
promoted = []
|
|
|
|
for cand in node.items[0]:
|
|
|
|
if all(cand in child for child in node.items[1:]):
|
|
|
|
promoted.append(cand)
|
|
|
|
if len(promoted) > 0:
|
|
|
|
for child in node.items:
|
|
|
|
for cand in promoted:
|
|
|
|
child.items.remove(cand)
|
|
|
|
newnode = othertype()
|
|
|
|
newnode.items = promoted
|
|
|
|
newnode.add(node)
|
|
|
|
return self._optimizeNode(newnode, changes=True)
|
|
|
|
|
|
|
|
# fallthrough
|
|
|
|
|
|
|
|
elif type(node) == ConditionNOT:
|
|
|
|
assert(len(node.items) == 1)
|
|
|
|
# NOT(NOT(X)) => X
|
|
|
|
if type(node.items[0]) == ConditionNOT:
|
|
|
|
assert(len(node.items[0].items) == 1)
|
|
|
|
return self._optimizeNode(node.items[0].items[0], changes=True)
|
|
|
|
|
2018-10-03 11:44:03 +00:00
|
|
|
# NOT(ConditionNULLValue) => ConditionNotNULLValue
|
2018-10-02 22:24:31 +00:00
|
|
|
if type(node.items[0]) == ConditionNULLValue:
|
|
|
|
newnode = ConditionNotNULLValue(val=node.items[0].items[0])
|
|
|
|
return self._optimizeNode(newnode, changes=True)
|
|
|
|
|
2018-10-03 11:44:03 +00:00
|
|
|
# NOT(ConditionNotNULLValue) => ConditionNULLValue
|
2018-10-02 22:24:31 +00:00
|
|
|
if type(node.items[0]) == ConditionNotNULLValue:
|
|
|
|
newnode = ConditionNULLValue(val=node.items[0].items[0])
|
|
|
|
return self._optimizeNode(newnode, changes=True)
|
|
|
|
|
|
|
|
# fallthrough
|
|
|
|
|
|
|
|
else:
|
|
|
|
return node, changes
|
|
|
|
|
|
|
|
itemresults = [self._optimizeNode(item, changes) for item in node.items]
|
|
|
|
node.items = [res[0] for res in itemresults]
|
|
|
|
if any(res[1] for res in itemresults):
|
|
|
|
changes = True
|
|
|
|
return node, changes
|
|
|
|
|
|
|
|
def optimizeTree(self, tree):
|
|
|
|
"""
|
2018-10-03 11:44:03 +00:00
|
|
|
Optimize the boolean expressions in the AST rooted at *tree*.
|
2018-10-02 22:24:31 +00:00
|
|
|
|
|
|
|
The main idea behind optimizing the AST is that less repeated terms is
|
|
|
|
generally better for backend performance. This is especially relevant
|
|
|
|
to backends that do not perform any query language optimization down
|
|
|
|
the road, such as those that generate code.
|
|
|
|
|
2018-10-03 11:44:03 +00:00
|
|
|
A common example for when these suboptimal rules actually occur in
|
|
|
|
practice is when a rule has multiple alternative detections that are
|
|
|
|
OR'ed together in the condition, and all of the detections include a
|
|
|
|
common element, such as the same EventID.
|
|
|
|
|
2018-10-02 22:24:31 +00:00
|
|
|
The following optimizations are currently performed:
|
|
|
|
- Removal of empty OR(), AND()
|
|
|
|
- OR(X), AND(X) => X
|
|
|
|
- OR(X, X, ...), AND(X, X, ...) => OR(X, ...), AND(X, ...)
|
|
|
|
- OR(X, OR(Y)) => OR(X, Y)
|
|
|
|
- OR(AND(X, ...), AND(X, ...)) => AND(X, OR(AND(...), AND(...)))
|
|
|
|
- NOT(NOT(X)) => X
|
2018-10-03 11:44:03 +00:00
|
|
|
- NOT(ConditionNULLValue) => ConditionNotNULLValue
|
|
|
|
- NOT(ConditionNotNULLValue) => ConditionNULLValue
|
|
|
|
|
|
|
|
Boolean logic simplification is NP-hard. To avoid backtracking,
|
|
|
|
speculative transformations that may or may not lead to a more optimal
|
|
|
|
expression were not implemented. These include for example factoring
|
|
|
|
out common operands that are not in all, but only some AND()s within an
|
|
|
|
OR(), or vice versa. Nevertheless, it is safe to assume that this
|
|
|
|
implementation performs poorly on very large expressions.
|
2018-10-02 22:24:31 +00:00
|
|
|
"""
|
|
|
|
tree = self._stripSubexpressionNode(tree)
|
|
|
|
changes = True
|
|
|
|
while changes:
|
|
|
|
tree, changes = self._optimizeNode(tree)
|
|
|
|
tree = self._unstripSubexpressionNode(tree)
|
|
|
|
return tree
|
|
|
|
|
2018-07-26 21:40:22 +00:00
|
|
|
# Condition parser
|
2017-02-22 21:43:35 +00:00
|
|
|
class SigmaConditionParser:
|
|
|
|
"""Parser for Sigma condition expression"""
|
|
|
|
searchOperators = [ # description of operators: (token id, number of operands, parse tree node class) - order == precedence
|
2018-07-26 22:02:07 +00:00
|
|
|
(SigmaConditionToken.TOKEN_ALL, 1, generateAllOf),
|
|
|
|
(SigmaConditionToken.TOKEN_ONE, 1, generateOneOf),
|
2017-02-22 21:43:35 +00:00
|
|
|
(SigmaConditionToken.TOKEN_ID, 0, convertId),
|
|
|
|
(SigmaConditionToken.TOKEN_NOT, 1, ConditionNOT),
|
|
|
|
(SigmaConditionToken.TOKEN_AND, 2, ConditionAND),
|
|
|
|
(SigmaConditionToken.TOKEN_OR, 2, ConditionOR),
|
|
|
|
]
|
|
|
|
|
|
|
|
def __init__(self, sigmaParser, tokens):
|
|
|
|
self.sigmaParser = sigmaParser
|
2017-03-12 22:12:21 +00:00
|
|
|
self.config = sigmaParser.config
|
2018-10-02 22:24:31 +00:00
|
|
|
self._optimizer = SigmaConditionOptimizer()
|
2017-03-29 20:22:01 +00:00
|
|
|
|
|
|
|
if SigmaConditionToken.TOKEN_PIPE in tokens: # Condition contains atr least one aggregation expression
|
|
|
|
pipepos = tokens.index(SigmaConditionToken.TOKEN_PIPE)
|
|
|
|
self.parsedSearch = self.parseSearch(tokens[:pipepos])
|
2017-08-04 22:28:22 +00:00
|
|
|
self.parsedAgg = SigmaAggregationParser(tokens[pipepos + 1:], self.sigmaParser, self.config)
|
2017-03-29 20:22:01 +00:00
|
|
|
else:
|
|
|
|
self.parsedSearch = self.parseSearch(tokens)
|
|
|
|
self.parsedAgg = None
|
2017-02-22 21:43:35 +00:00
|
|
|
|
|
|
|
def parseSearch(self, tokens):
|
|
|
|
"""
|
|
|
|
Iterative parsing of search expression.
|
|
|
|
"""
|
|
|
|
# 1. Identify subexpressions with parentheses around them and parse them like a separate search expression
|
|
|
|
while SigmaConditionToken.TOKEN_LPAR in tokens:
|
|
|
|
lPos = tokens.index(SigmaConditionToken.TOKEN_LPAR)
|
|
|
|
lTok = tokens[lPos]
|
|
|
|
try:
|
|
|
|
rPos = tokens.index(SigmaConditionToken.TOKEN_RPAR)
|
|
|
|
rTok = tokens[rPos]
|
|
|
|
except ValueError as e:
|
|
|
|
raise SigmaParseError("Missing matching closing parentheses") from e
|
|
|
|
if lPos + 1 == rPos:
|
|
|
|
raise SigmaParseError("Empty subexpression at " + str(lTok.pos))
|
|
|
|
if lPos > rPos:
|
|
|
|
raise SigmaParseError("Closing parentheses at position " + str(rTok.pos) + " precedes opening at position " + str(lTok.pos))
|
|
|
|
|
2017-03-18 12:57:42 +00:00
|
|
|
subparsed = self.parseSearch(tokens[lPos + 1:rPos])
|
2017-02-22 21:47:12 +00:00
|
|
|
tokens = tokens[:lPos] + NodeSubexpression(subparsed) + tokens[rPos + 1:] # replace parentheses + expression with group node that contains parsed subexpression
|
2017-02-22 21:43:35 +00:00
|
|
|
|
|
|
|
# 2. Iterate over all known operators in given precedence
|
|
|
|
for operator in self.searchOperators:
|
|
|
|
# 3. reduce all occurrences into corresponding parse tree nodes
|
|
|
|
while operator[0] in tokens:
|
|
|
|
pos_op = tokens.index(operator[0])
|
|
|
|
tok_op = tokens[pos_op]
|
|
|
|
if operator[1] == 0: # operator
|
|
|
|
treenode = operator[2](self.sigmaParser, tok_op)
|
2017-02-22 21:47:12 +00:00
|
|
|
tokens = tokens[:pos_op] + treenode + tokens[pos_op + 1:]
|
2017-02-22 21:43:35 +00:00
|
|
|
elif operator[1] == 1: # operator value
|
|
|
|
pos_val = pos_op + 1
|
|
|
|
tok_val = tokens[pos_val]
|
|
|
|
treenode = operator[2](self.sigmaParser, tok_op, tok_val)
|
2017-02-22 21:47:12 +00:00
|
|
|
tokens = tokens[:pos_op] + treenode + tokens[pos_val + 1:]
|
2017-02-22 21:43:35 +00:00
|
|
|
elif operator[1] == 2: # value1 operator value2
|
|
|
|
pos_val1 = pos_op - 1
|
|
|
|
pos_val2 = pos_op + 1
|
|
|
|
tok_val1 = tokens[pos_val1]
|
|
|
|
tok_val2 = tokens[pos_val2]
|
|
|
|
treenode = operator[2](self.sigmaParser, tok_op, tok_val1, tok_val2)
|
2017-02-22 21:47:12 +00:00
|
|
|
tokens = tokens[:pos_val1] + treenode + tokens[pos_val2 + 1:]
|
2017-02-22 21:47:12 +00:00
|
|
|
|
|
|
|
if len(tokens) != 1: # parse tree must begin with exactly one node
|
|
|
|
raise ValueError("Parse tree must have exactly one start node!")
|
2018-09-12 21:31:51 +00:00
|
|
|
query_cond = tokens[0]
|
|
|
|
|
|
|
|
# 4. Integrate conditions from logsources in configurations
|
|
|
|
ls_cond = self.sigmaParser.get_logsource_condition()
|
|
|
|
if ls_cond is not None:
|
|
|
|
cond = ConditionAND()
|
|
|
|
cond.add(ls_cond)
|
|
|
|
cond.add(query_cond)
|
|
|
|
query_cond = cond
|
|
|
|
|
2019-01-14 21:36:15 +00:00
|
|
|
return self._optimizer.optimizeTree(query_cond)
|
2017-02-22 21:47:12 +00:00
|
|
|
|
2018-11-04 22:28:40 +00:00
|
|
|
def __str__(self): # pragma: no cover
|
2017-02-22 21:47:12 +00:00
|
|
|
return str(self.parsedSearch)
|
2017-02-22 21:47:12 +00:00
|
|
|
|
2019-01-14 21:36:15 +00:00
|
|
|
def __len__(self): # pragma: no cover
|
2017-02-22 21:47:12 +00:00
|
|
|
return len(self.parsedSearch)
|
2017-08-02 22:05:48 +00:00
|
|
|
|
2018-07-26 21:40:22 +00:00
|
|
|
# Aggregation parser
|
2017-03-29 20:22:01 +00:00
|
|
|
class SigmaAggregationParser(SimpleParser):
|
|
|
|
"""Parse Sigma aggregation expression and provide parsed data"""
|
|
|
|
parsingrules = [
|
|
|
|
{ # State 0
|
2017-08-02 22:05:48 +00:00
|
|
|
SigmaConditionToken.TOKEN_AGG: ("aggfunc", "trans_aggfunc", 1),
|
2017-08-04 22:28:22 +00:00
|
|
|
SigmaConditionToken.TOKEN_NEAR: ("aggfunc", "init_near_parsing", 8),
|
2017-03-29 20:22:01 +00:00
|
|
|
},
|
|
|
|
{ # State 1
|
|
|
|
SigmaConditionToken.TOKEN_LPAR: (None, None, 2)
|
|
|
|
},
|
|
|
|
{ # State 2
|
|
|
|
SigmaConditionToken.TOKEN_RPAR: (None, None, 4),
|
2017-08-02 22:05:48 +00:00
|
|
|
SigmaConditionToken.TOKEN_ID: ("aggfield", "trans_fieldname", 3),
|
2017-03-29 20:22:01 +00:00
|
|
|
},
|
|
|
|
{ # State 3
|
|
|
|
SigmaConditionToken.TOKEN_RPAR: (None, None, 4)
|
|
|
|
},
|
|
|
|
{ # State 4
|
|
|
|
SigmaConditionToken.TOKEN_BY: ("cond_op", None, 5),
|
|
|
|
SigmaConditionToken.TOKEN_EQ: ("cond_op", None, 7),
|
|
|
|
SigmaConditionToken.TOKEN_LT: ("cond_op", None, 7),
|
|
|
|
SigmaConditionToken.TOKEN_LTE: ("cond_op", None, 7),
|
|
|
|
SigmaConditionToken.TOKEN_GT: ("cond_op", None, 7),
|
|
|
|
SigmaConditionToken.TOKEN_GTE: ("cond_op", None, 7),
|
|
|
|
},
|
|
|
|
{ # State 5
|
|
|
|
SigmaConditionToken.TOKEN_ID: ("groupfield", "trans_fieldname", 6)
|
|
|
|
},
|
|
|
|
{ # State 6
|
|
|
|
SigmaConditionToken.TOKEN_EQ: ("cond_op", None, 7),
|
|
|
|
SigmaConditionToken.TOKEN_LT: ("cond_op", None, 7),
|
|
|
|
SigmaConditionToken.TOKEN_LTE: ("cond_op", None, 7),
|
|
|
|
SigmaConditionToken.TOKEN_GT: ("cond_op", None, 7),
|
|
|
|
SigmaConditionToken.TOKEN_GTE: ("cond_op", None, 7),
|
|
|
|
},
|
|
|
|
{ # State 7
|
|
|
|
SigmaConditionToken.TOKEN_ID: ("condition", None, -1)
|
|
|
|
},
|
2017-08-02 22:05:48 +00:00
|
|
|
{ # State 8
|
2017-08-04 22:28:22 +00:00
|
|
|
SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9)
|
2017-08-02 22:05:48 +00:00
|
|
|
},
|
|
|
|
{ # State 9
|
2017-08-04 22:28:22 +00:00
|
|
|
SigmaConditionToken.TOKEN_AND: (None, "set_include", 10),
|
2017-08-02 22:05:48 +00:00
|
|
|
},
|
|
|
|
{ # State 10
|
2017-08-04 22:28:22 +00:00
|
|
|
SigmaConditionToken.TOKEN_NOT: (None, "set_exclude", 8),
|
|
|
|
SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9),
|
2017-08-02 22:05:48 +00:00
|
|
|
},
|
2017-03-29 20:22:01 +00:00
|
|
|
]
|
2017-10-29 23:25:56 +00:00
|
|
|
finalstates = { -1, 9 }
|
2017-03-29 20:22:01 +00:00
|
|
|
|
|
|
|
# Aggregation functions
|
|
|
|
AGGFUNC_COUNT = 1
|
2017-08-04 22:28:22 +00:00
|
|
|
AGGFUNC_MIN = 2
|
|
|
|
AGGFUNC_MAX = 3
|
|
|
|
AGGFUNC_AVG = 4
|
|
|
|
AGGFUNC_SUM = 5
|
|
|
|
AGGFUNC_NEAR = 6
|
2017-03-29 20:22:01 +00:00
|
|
|
aggfuncmap = {
|
|
|
|
"count": AGGFUNC_COUNT,
|
2017-08-04 22:28:22 +00:00
|
|
|
"min": AGGFUNC_MIN,
|
|
|
|
"max": AGGFUNC_MAX,
|
|
|
|
"avg": AGGFUNC_AVG,
|
|
|
|
"sum": AGGFUNC_SUM,
|
|
|
|
"near": AGGFUNC_NEAR,
|
2017-03-29 20:22:01 +00:00
|
|
|
}
|
|
|
|
|
2017-08-04 22:28:22 +00:00
|
|
|
def __init__(self, tokens, parser, config):
|
|
|
|
self.parser = parser
|
2017-03-29 20:22:01 +00:00
|
|
|
self.config = config
|
2018-02-07 22:54:56 +00:00
|
|
|
self.aggfield = None
|
2017-03-29 20:22:01 +00:00
|
|
|
self.groupfield = None
|
|
|
|
super().__init__(tokens)
|
|
|
|
|
|
|
|
def trans_aggfunc(self, name):
|
|
|
|
"""Translate aggregation function name into constant"""
|
|
|
|
try:
|
|
|
|
return self.aggfuncmap[name]
|
|
|
|
except KeyError:
|
|
|
|
raise SigmaParseError("Unknown aggregation function '%s'" % (name))
|
|
|
|
|
|
|
|
def trans_fieldname(self, fieldname):
|
|
|
|
"""Translate field name into configured mapped name"""
|
2019-10-09 21:55:57 +00:00
|
|
|
mapped = self.config.get_fieldmapping(fieldname).resolve_fieldname(fieldname, self.parser)
|
2017-03-29 20:22:01 +00:00
|
|
|
if type(mapped) == str:
|
|
|
|
return mapped
|
|
|
|
else:
|
|
|
|
raise NotImplementedError("Field mappings in aggregations must be single valued")
|
|
|
|
|
2017-08-04 22:28:22 +00:00
|
|
|
def init_near_parsing(self, name):
|
|
|
|
"""Initialize data structures for 'near" aggregation operator parsing"""
|
|
|
|
self.include = list()
|
|
|
|
self.exclude = list()
|
|
|
|
self.current = self.include
|
|
|
|
return self.trans_aggfunc(name)
|
|
|
|
|
|
|
|
def store_search_id(self, name):
|
|
|
|
self.current.append(name)
|
|
|
|
return name
|
|
|
|
|
|
|
|
def set_include(self, name):
|
|
|
|
self.current = self.include
|
|
|
|
|
|
|
|
def set_exclude(self, name):
|
|
|
|
self.current = self.exclude
|