Conversion to Elasticsearch Query Strings

First version of sigmac that converts Sigma YAMLs without aggregations
into ES Query Strings suitable for Kibana or other tools.
This commit is contained in:
Thomas Patzke 2017-02-22 22:47:12 +01:00
parent 58f2118ef4
commit e0f813ebbb
3 changed files with 89 additions and 7 deletions

View File

@ -1,6 +1,8 @@
# Output backends for sigmac
import json
import re
import sigma
def getBackendList():
"""Return list of backend classes"""
@ -9,15 +11,56 @@ def getBackendList():
def getBackendDict():
return {cls.identifier: cls for cls in getBackendList() }
def getBackend(name):
try:
return getBackendDict()[name]
except KeyError as e:
raise LookupError("Backend not found") from e
class BaseBackend:
"""Base class for all backends"""
identifier = "base"
active = False
def generate(self, parsed):
raise NotImplementedError("Backend is not implemented yet")
class ElasticsearchQuerystringBackend(BaseBackend):
"""Converts Sigma rule into Elasticsearch query string. Only searches, no aggregations."""
identifier = "es-qs"
active = True
reEscape = re.compile("([+\\-=!(){}\\[\\]^\"~*?:\\\\/]|&&|\\|\\|)")
reClear = re.compile("[<>]")
def generate(self, parsed):
return self.generateNode(parsed.getParseTree())
def cleanValue(self, val):
val = self.reEscape.sub("\\\\\g<1>", val)
return self.reClear.sub("", val)
def generateNode(self, node):
if type(node) == sigma.ConditionAND:
return " AND ".join([self.generateNode(val) for val in node])
elif type(node) == sigma.ConditionOR:
return " OR ".join([self.generateNode(val) for val in node])
elif type(node) == sigma.ConditionNOT:
return "NOT " + self.generateNode(node.item)
elif type(node) == sigma.NodeSubexpression:
return "(%s)" % self.generateNode(node.items)
elif type(node) == tuple:
key, value = node
if type(value) not in (str, int, list):
raise TypeError("Map values must be strings, numbers or lists, not " + str(type(value)))
return "%s:%s" % (key, self.generateNode(value))
elif type(node) in (str, int):
return "\"%s\"" % (self.cleanValue(str(node)))
elif type(node) == list:
if not set([type(value) for value in node]).issubset({str, int}):
raise TypeError("List values must be strings or numbers")
return "(%s)" % (" ".join([self.generateNode(value) for value in node]))
else:
raise TypeError("Node type %s was not expected in Sigma parse tree" % (str(type(node))))
class ElasticsearchDSLBackend(BaseBackend):
"""Converts Sigma rule into Elasticsearch DSL query (JSON)."""
@ -38,3 +81,6 @@ class NullBackend(BaseBackend):
"""Does nothing, for debugging purposes."""
identifier = "null"
active = True
def generate(self, parsed):
pass

View File

@ -37,11 +37,14 @@ class SigmaParser:
for tokens in self.condtoken:
self.condparsed.append(SigmaConditionParser(self, tokens))
def parse_definition(self, definitionName, condOverride=None):
def parse_definition_byname(self, definitionName, condOverride=None):
try:
definition = self.definitions[definitionName]
except KeyError as e:
raise SigmaParseError("Unknown definition '%s'" % (definitionName)) from e
return self.parse_definition(definition, condOverride)
def parse_definition(self, definition, condOverride=None):
if type(definition) not in (dict, list):
raise SigmaParseError("Expected map or list, got type %s: '%s'" % (type(definition), str(definition)))
@ -51,9 +54,12 @@ class SigmaParser:
else: # no condition given, use default from spec
cond = ConditionOR()
subcond = None
for value in definition:
if type(value) in (str, int, dict):
if type(value) in (str, int):
cond.add(value)
elif type(value) in (dict, list):
cond.add(self.parse_definition(value))
else:
raise SigmaParseError("Definition list may only contain plain values or maps")
elif type(definition) == dict: # map
@ -165,6 +171,9 @@ class SigmaConditionTokenizer:
def __iter__(self):
return iter(self.tokens)
def __len__(self):
return len(self.tokens)
def __getitem__(self, i):
if type(i) == int:
return self.tokens[i]
@ -207,6 +216,9 @@ class ConditionBase(ParseTreeNode):
def add(self, item):
self.items.append(item)
def __iter__(self):
return iter(self.items)
class ConditionAND(ConditionBase):
"""AND Condition"""
op = COND_AND
@ -237,6 +249,13 @@ class ConditionNOT(ConditionBase):
else:
raise ValueError("Only one element allowed in NOT condition")
@property
def item(self):
try:
return self.items[0]
except IndexError:
return None
class NodeSubexpression(ParseTreeNode):
"""Subexpression"""
def __init__(self, subexpr):
@ -245,15 +264,15 @@ class NodeSubexpression(ParseTreeNode):
# Parse tree converters: convert something into one of the parse tree node classes defined above
def convertAllOf(sigma, op, val):
"""Convert 'all of x' into ConditionAND"""
return sigma.parse_definition(val.matched, ConditionAND)
return NodeSubexpression(sigma.parse_definition_byname(val.matched, ConditionAND))
def convertOneOf(sigma, op, val):
"""Convert '1 of x' into ConditionOR"""
return sigma.parse_definition(val.matched, ConditionOR)
return NodeSubexpression(sigma.parse_definition_byname(val.matched, ConditionOR))
def convertId(sigma, op):
"""Convert search identifiers (lists or maps) into condition nodes according to spec defaults"""
return sigma.parse_definition(op.matched)
return NodeSubexpression(sigma.parse_definition_byname(op.matched))
# Condition parser class
class SigmaConditionParser:
@ -292,7 +311,7 @@ class SigmaConditionParser:
if lPos > rPos:
raise SigmaParseError("Closing parentheses at position " + str(rTok.pos) + " precedes opening at position " + str(lTok.pos))
subparsed = self.parseSearch(tokens[lPos + 1:rPos])
subparsed = self.parseSearch(tokens[lPos + 1:rPos])[0]
tokens = tokens[:lPos] + NodeSubexpression(subparsed) + tokens[rPos + 1:] # replace parentheses + expression with group node that contains parsed subexpression
# 2. Iterate over all known operators in given precedence
@ -316,7 +335,17 @@ class SigmaConditionParser:
tok_val2 = tokens[pos_val2]
treenode = operator[2](self.sigmaParser, tok_op, tok_val1, tok_val2)
tokens = tokens[:pos_val1] + treenode + tokens[pos_val2 + 1:]
if len(tokens) != 1: # parse tree must begin with exactly one node
raise ValueError("Parse tree must have exactly one start node!")
return tokens
def __str__(self):
return str(self.parsedSearch)
def __len__(self):
return len(self.parsedSearch)
def getParseTree(self):
return(self.parsedSearch[0])

View File

@ -32,6 +32,12 @@ if cmdargs.target_list:
print("%10s: %s" % (backend.identifier, backend.__doc__))
sys.exit(0)
try:
backend = backends.getBackend(cmdargs.target)()
except LookupError as e:
print("Backend not found!")
sys.exit(1)
for sigmafile in cmdargs.inputs:
print_verbose("* Processing Sigma input %s" % (sigmafile))
try:
@ -43,6 +49,7 @@ for sigmafile in cmdargs.inputs:
print_debug("Condition Tokens:", condtoken)
for condparsed in parser.condparsed:
print_debug("Condition Parse Tree:", condparsed)
print(backend.generate(condparsed))
except OSError as e:
print("Failed to open Sigma file %s: %s" % (sigmafile, str(e)))
except yaml.parser.ParserError as e:
@ -50,7 +57,7 @@ for sigmafile in cmdargs.inputs:
except SigmaParseError as e:
print("Sigma parse error in %s: %s" % (sigmafile, str(e)))
except NotImplementedError as e:
print("This tool currently doesn't support the provided input: " + str(e))
print("An unsupported feature is required for this Sigma rule: " + str(e))
print("Feel free to contribute for fun and fame, this is open source :) -> https://github.com/Neo23x0/sigma")
finally:
f.close()