SigmaHQ/tools/sigma/parser/condition.py

462 lines
18 KiB
Python
Raw Normal View History

# Sigma parser
# Copyright 2016-2017 Thomas Patzke, Florian Roth
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
from .base import SimpleParser
from .exceptions import SigmaParseError
2017-10-31 21:13:20 +00:00
COND_NONE = 0
COND_AND = 1
COND_OR = 2
COND_NOT = 3
COND_NULL = 4
# Condition Tokenizer
2017-02-16 22:58:44 +00:00
class SigmaConditionToken:
"""Token of a Sigma condition expression"""
TOKEN_AND = 1
TOKEN_OR = 2
TOKEN_NOT = 3
TOKEN_ID = 4
TOKEN_LPAR = 5
TOKEN_RPAR = 6
TOKEN_PIPE = 7
TOKEN_ONE = 8
TOKEN_ALL = 9
TOKEN_AGG = 10
TOKEN_EQ = 11
TOKEN_LT = 12
TOKEN_LTE = 13
TOKEN_GT = 14
TOKEN_GTE = 15
TOKEN_BY = 16
TOKEN_NEAR = 17
2017-02-16 22:58:44 +00:00
tokenstr = [
"INVALID",
"AND",
"OR",
"NOT",
"ID",
"LPAR",
"RPAR",
"PIPE",
"ONE",
"ALL",
"AGG",
"EQ",
"LT",
"LTE",
"GT",
"GTE",
"BY",
"NEAR",
2017-02-16 22:58:44 +00:00
]
def __init__(self, tokendef, match, pos):
2017-02-16 22:58:44 +00:00
self.type = tokendef[0]
self.matched = match.group()
self.pos = pos
2017-02-16 22:58:44 +00:00
def __eq__(self, other):
if type(other) == int: # match against type
return self.type == other
2017-03-29 20:22:01 +00:00
if type(other) == str: # match against content
return self.matched == other
2017-02-16 22:58:44 +00:00
else:
raise NotImplementedError("SigmaConditionToken can only be compared against token type constants")
def __str__(self):
return "[ Token: %s: '%s' ]" % (self.tokenstr[self.type], self.matched)
class SigmaConditionTokenizer:
"""Tokenize condition string into token sequence"""
tokendefs = [ # list of tokens, preferred recognition in given order, (token identifier, matching regular expression). Ignored if token id == None
(SigmaConditionToken.TOKEN_ONE, re.compile("1 of", re.IGNORECASE)),
(SigmaConditionToken.TOKEN_ALL, re.compile("all of", re.IGNORECASE)),
2017-02-16 22:58:44 +00:00
(None, re.compile("[\\s\\r\\n]+")),
(SigmaConditionToken.TOKEN_AGG, re.compile("count|min|max|avg|sum", re.IGNORECASE)),
(SigmaConditionToken.TOKEN_NEAR, re.compile("near", re.IGNORECASE)),
(SigmaConditionToken.TOKEN_BY, re.compile("by", re.IGNORECASE)),
(SigmaConditionToken.TOKEN_EQ, re.compile("==")),
(SigmaConditionToken.TOKEN_LT, re.compile("<")),
(SigmaConditionToken.TOKEN_LTE, re.compile("<=")),
(SigmaConditionToken.TOKEN_GT, re.compile(">")),
(SigmaConditionToken.TOKEN_GTE, re.compile(">=")),
(SigmaConditionToken.TOKEN_PIPE, re.compile("\\|")),
(SigmaConditionToken.TOKEN_AND, re.compile("and", re.IGNORECASE)),
(SigmaConditionToken.TOKEN_OR, re.compile("or", re.IGNORECASE)),
(SigmaConditionToken.TOKEN_NOT, re.compile("not", re.IGNORECASE)),
(SigmaConditionToken.TOKEN_ID, re.compile("[\\w*]+")),
(SigmaConditionToken.TOKEN_LPAR, re.compile("\\(")),
(SigmaConditionToken.TOKEN_RPAR, re.compile("\\)")),
2017-02-16 22:58:44 +00:00
]
def __init__(self, condition):
if type(condition) == str: # String that is parsed
self.tokens = list()
pos = 1
while len(condition) > 0:
for tokendef in self.tokendefs: # iterate over defined tokens and try to recognize the next one
match = tokendef[1].match(condition)
if match:
if tokendef[0] != None:
self.tokens.append(SigmaConditionToken(tokendef, match, pos + match.start()))
pos += match.end() # increase position and cut matched prefix from condition
condition = condition[match.end():]
break
else: # no valid token identified
raise SigmaParseError("Unexpected token in condition at position %s" % condition)
elif type(condition) == list: # List of tokens to be converted into SigmaConditionTokenizer class
self.tokens = condition
else:
raise TypeError("SigmaConditionTokenizer constructor expects string or list, got %s" % (type(condition)))
2017-02-16 22:58:44 +00:00
def __str__(self):
return " ".join([str(token) for token in self.tokens])
def __iter__(self):
return iter(self.tokens)
def __len__(self):
return len(self.tokens)
def __getitem__(self, i):
if type(i) == int:
return self.tokens[i]
elif type(i) == slice:
return SigmaConditionTokenizer(self.tokens[i])
else:
raise IndexError("Expected index or slice")
def __add__(self, other):
if isinstance(other, SigmaConditionTokenizer):
return SigmaConditionTokenizer(self.tokens + other.tokens)
elif isinstance(other, (SigmaConditionToken, ParseTreeNode)):
return SigmaConditionTokenizer(self.tokens + [ other ])
else:
raise TypeError("+ operator expects SigmaConditionTokenizer or token type, got %s: %s" % (type(other), str(other)))
def index(self, item):
return self.tokens.index(item)
2017-03-04 23:37:28 +00:00
### Parse Tree Node Classes ###
class ParseTreeNode:
"""Parse Tree Node Base Class"""
def __init__(self):
raise NotImplementedError("ConditionBase is no usable class")
def __str__(self):
return "[ %s: %s ]" % (self.__doc__, str([str(item) for item in self.items]))
class ConditionBase(ParseTreeNode):
"""Base class for conditional operations"""
op = COND_NONE
items = None
def __init__(self):
raise NotImplementedError("ConditionBase is no usable class")
def add(self, item):
self.items.append(item)
def __iter__(self):
return iter(self.items)
def __len__(self):
return len(self.items)
class ConditionAND(ConditionBase):
"""AND Condition"""
op = COND_AND
def __init__(self, sigma=None, op=None, val1=None, val2=None):
if sigma == None and op == None and val1 == None and val2 == None: # no parameters given - initialize empty
self.items = list()
else: # called by parser, use given values
self.items = [ val1, val2 ]
class ConditionOR(ConditionAND):
"""OR Condition"""
op = COND_OR
class ConditionNOT(ConditionBase):
"""NOT Condition"""
op = COND_NOT
def __init__(self, sigma=None, op=None, val=None):
if sigma == None and op == None and val == None: # no parameters given - initialize empty
self.items = list()
else: # called by parser, use given values
self.items = [ val ]
def add(self, item):
if len(self.items) == 0:
super.add(item)
else:
raise ValueError("Only one element allowed")
@property
def item(self):
try:
return self.items[0]
except IndexError:
return None
class ConditionNULLValue(ConditionNOT):
"""Condition: Field value is empty or doesn't exists"""
pass
class ConditionNotNULLValue(ConditionNULLValue):
"""Condition: Field value is not empty"""
pass
class NodeSubexpression(ParseTreeNode):
"""Subexpression"""
def __init__(self, subexpr):
self.items = subexpr
# Parse tree generators: generate parse tree nodes from extended conditions
def generateXOf(sigma, val, condclass):
"""
Generic implementation of (1|all) of x expressions.
* condclass across all list items if x is name of definition
* condclass across all definitions if x is keyword 'them'
* condclass across all matching definition if x is wildcard expression, e.g. 'selection*'
"""
if val.matched == "them": # OR across all definitions
cond = condclass()
for definition in sigma.definitions.values():
cond.add(NodeSubexpression(sigma.parse_definition(definition)))
return NodeSubexpression(cond)
elif val.matched.find("*") > 0: # OR across all matching definitions
cond = condclass()
reDefPat = re.compile("^" + val.matched.replace("*", ".*") + "$")
for name, definition in sigma.definitions.items():
if reDefPat.match(name):
cond.add(NodeSubexpression(sigma.parse_definition(definition)))
return NodeSubexpression(cond)
else: # OR across all items of definition
return NodeSubexpression(sigma.parse_definition_byname(val.matched, condclass))
def generateAllOf(sigma, op, val):
"""Convert 'all of x' expressions into ConditionAND"""
return generateXOf(sigma, val, ConditionAND)
def generateOneOf(sigma, op, val):
"""Convert '1 of x' expressions into ConditionOR"""
return generateXOf(sigma, val, ConditionOR)
def convertId(sigma, op):
"""Convert search identifiers (lists or maps) into condition nodes according to spec defaults"""
return NodeSubexpression(sigma.parse_definition_byname(op.matched))
# Condition parser
class SigmaConditionParser:
"""Parser for Sigma condition expression"""
searchOperators = [ # description of operators: (token id, number of operands, parse tree node class) - order == precedence
(SigmaConditionToken.TOKEN_ALL, 1, generateAllOf),
(SigmaConditionToken.TOKEN_ONE, 1, generateOneOf),
(SigmaConditionToken.TOKEN_ID, 0, convertId),
(SigmaConditionToken.TOKEN_NOT, 1, ConditionNOT),
(SigmaConditionToken.TOKEN_AND, 2, ConditionAND),
(SigmaConditionToken.TOKEN_OR, 2, ConditionOR),
]
def __init__(self, sigmaParser, tokens):
self.sigmaParser = sigmaParser
self.config = sigmaParser.config
2017-03-29 20:22:01 +00:00
if SigmaConditionToken.TOKEN_PIPE in tokens: # Condition contains atr least one aggregation expression
pipepos = tokens.index(SigmaConditionToken.TOKEN_PIPE)
self.parsedSearch = self.parseSearch(tokens[:pipepos])
self.parsedAgg = SigmaAggregationParser(tokens[pipepos + 1:], self.sigmaParser, self.config)
2017-03-29 20:22:01 +00:00
else:
self.parsedSearch = self.parseSearch(tokens)
self.parsedAgg = None
def parseSearch(self, tokens):
"""
Iterative parsing of search expression.
"""
# 1. Identify subexpressions with parentheses around them and parse them like a separate search expression
while SigmaConditionToken.TOKEN_LPAR in tokens:
lPos = tokens.index(SigmaConditionToken.TOKEN_LPAR)
lTok = tokens[lPos]
try:
rPos = tokens.index(SigmaConditionToken.TOKEN_RPAR)
rTok = tokens[rPos]
except ValueError as e:
raise SigmaParseError("Missing matching closing parentheses") from e
if lPos + 1 == rPos:
raise SigmaParseError("Empty subexpression at " + str(lTok.pos))
if lPos > rPos:
raise SigmaParseError("Closing parentheses at position " + str(rTok.pos) + " precedes opening at position " + str(lTok.pos))
2017-03-18 12:57:42 +00:00
subparsed = self.parseSearch(tokens[lPos + 1:rPos])
tokens = tokens[:lPos] + NodeSubexpression(subparsed) + tokens[rPos + 1:] # replace parentheses + expression with group node that contains parsed subexpression
# 2. Iterate over all known operators in given precedence
for operator in self.searchOperators:
# 3. reduce all occurrences into corresponding parse tree nodes
while operator[0] in tokens:
pos_op = tokens.index(operator[0])
tok_op = tokens[pos_op]
if operator[1] == 0: # operator
treenode = operator[2](self.sigmaParser, tok_op)
tokens = tokens[:pos_op] + treenode + tokens[pos_op + 1:]
elif operator[1] == 1: # operator value
pos_val = pos_op + 1
tok_val = tokens[pos_val]
treenode = operator[2](self.sigmaParser, tok_op, tok_val)
tokens = tokens[:pos_op] + treenode + tokens[pos_val + 1:]
elif operator[1] == 2: # value1 operator value2
pos_val1 = pos_op - 1
pos_val2 = pos_op + 1
tok_val1 = tokens[pos_val1]
tok_val2 = tokens[pos_val2]
treenode = operator[2](self.sigmaParser, tok_op, tok_val1, tok_val2)
tokens = tokens[:pos_val1] + treenode + tokens[pos_val2 + 1:]
if len(tokens) != 1: # parse tree must begin with exactly one node
raise ValueError("Parse tree must have exactly one start node!")
query_cond = tokens[0]
# 4. Integrate conditions from logsources in configurations
ls_cond = self.sigmaParser.get_logsource_condition()
if ls_cond is not None:
cond = ConditionAND()
cond.add(ls_cond)
cond.add(query_cond)
query_cond = cond
return query_cond
def __str__(self):
return str(self.parsedSearch)
def __len__(self):
return len(self.parsedSearch)
# Aggregation parser
2017-03-29 20:22:01 +00:00
class SigmaAggregationParser(SimpleParser):
"""Parse Sigma aggregation expression and provide parsed data"""
parsingrules = [
{ # State 0
SigmaConditionToken.TOKEN_AGG: ("aggfunc", "trans_aggfunc", 1),
SigmaConditionToken.TOKEN_NEAR: ("aggfunc", "init_near_parsing", 8),
2017-03-29 20:22:01 +00:00
},
{ # State 1
SigmaConditionToken.TOKEN_LPAR: (None, None, 2)
},
{ # State 2
SigmaConditionToken.TOKEN_RPAR: (None, None, 4),
SigmaConditionToken.TOKEN_ID: ("aggfield", "trans_fieldname", 3),
2017-03-29 20:22:01 +00:00
},
{ # State 3
SigmaConditionToken.TOKEN_RPAR: (None, None, 4)
},
{ # State 4
SigmaConditionToken.TOKEN_BY: ("cond_op", None, 5),
SigmaConditionToken.TOKEN_EQ: ("cond_op", None, 7),
SigmaConditionToken.TOKEN_LT: ("cond_op", None, 7),
SigmaConditionToken.TOKEN_LTE: ("cond_op", None, 7),
SigmaConditionToken.TOKEN_GT: ("cond_op", None, 7),
SigmaConditionToken.TOKEN_GTE: ("cond_op", None, 7),
},
{ # State 5
SigmaConditionToken.TOKEN_ID: ("groupfield", "trans_fieldname", 6)
},
{ # State 6
SigmaConditionToken.TOKEN_EQ: ("cond_op", None, 7),
SigmaConditionToken.TOKEN_LT: ("cond_op", None, 7),
SigmaConditionToken.TOKEN_LTE: ("cond_op", None, 7),
SigmaConditionToken.TOKEN_GT: ("cond_op", None, 7),
SigmaConditionToken.TOKEN_GTE: ("cond_op", None, 7),
},
{ # State 7
SigmaConditionToken.TOKEN_ID: ("condition", None, -1)
},
{ # State 8
SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9)
},
{ # State 9
SigmaConditionToken.TOKEN_AND: (None, "set_include", 10),
},
{ # State 10
SigmaConditionToken.TOKEN_NOT: (None, "set_exclude", 8),
SigmaConditionToken.TOKEN_ID: (None, "store_search_id", 9),
},
2017-03-29 20:22:01 +00:00
]
finalstates = { -1, 9 }
2017-03-29 20:22:01 +00:00
# Aggregation functions
AGGFUNC_COUNT = 1
AGGFUNC_MIN = 2
AGGFUNC_MAX = 3
AGGFUNC_AVG = 4
AGGFUNC_SUM = 5
AGGFUNC_NEAR = 6
2017-03-29 20:22:01 +00:00
aggfuncmap = {
"count": AGGFUNC_COUNT,
"min": AGGFUNC_MIN,
"max": AGGFUNC_MAX,
"avg": AGGFUNC_AVG,
"sum": AGGFUNC_SUM,
"near": AGGFUNC_NEAR,
2017-03-29 20:22:01 +00:00
}
def __init__(self, tokens, parser, config):
self.parser = parser
2017-03-29 20:22:01 +00:00
self.config = config
self.aggfield = None
2017-03-29 20:22:01 +00:00
self.groupfield = None
super().__init__(tokens)
def trans_aggfunc(self, name):
"""Translate aggregation function name into constant"""
try:
return self.aggfuncmap[name]
except KeyError:
raise SigmaParseError("Unknown aggregation function '%s'" % (name))
def trans_fieldname(self, fieldname):
"""Translate field name into configured mapped name"""
mapped = self.config.get_fieldmapping(fieldname).resolve_fieldname(fieldname)
if type(mapped) == str:
return mapped
else:
raise NotImplementedError("Field mappings in aggregations must be single valued")
def init_near_parsing(self, name):
"""Initialize data structures for 'near" aggregation operator parsing"""
self.include = list()
self.exclude = list()
self.current = self.include
return self.trans_aggfunc(name)
def store_search_id(self, name):
self.current.append(name)
return name
def set_include(self, name):
self.current = self.include
def set_exclude(self, name):
self.current = self.exclude