Added sigma-similarity tool

Fixed also bug in backend base class that was triggered by the way
backends are used by this tool.
This commit is contained in:
Thomas Patzke 2019-10-25 21:59:03 +02:00
parent a5ec6722a1
commit 30948b9c1a
4 changed files with 173 additions and 49 deletions

View File

@ -12,6 +12,7 @@ elasticsearch = "*"
elasticsearch-async = "*"
pymisp = "*"
PyYAML = ">=3.11"
progressbar2 = "*"
[requires]
python_version = "3.6"

122
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "5e571aa1a1b4f78e71563cc30e3f457c8359c36888c76b6ed68376dadce445bb"
"sha256": "f3f1c14d8b9cfcd5608e018017012b8712a94fb7a56f633ae179bd3451d636fb"
},
"pipfile-spec": 6,
"requires": {
@ -18,30 +18,20 @@
"default": {
"aiohttp": {
"hashes": [
"sha256:00d198585474299c9c3b4f1d5de1a576cc230d562abc5e4a0e81d71a20a6ca55",
"sha256:0155af66de8c21b8dba4992aaeeabf55503caefae00067a3b1139f86d0ec50ed",
"sha256:09654a9eca62d1bd6d64aa44db2498f60a5c1e0ac4750953fdd79d5c88955e10",
"sha256:199f1d106e2b44b6dacdf6f9245493c7d716b01d0b7fbe1959318ba4dc64d1f5",
"sha256:296f30dedc9f4b9e7a301e5cc963012264112d78a1d3094cd83ef148fdf33ca1",
"sha256:368ed312550bd663ce84dc4b032a962fcb3c7cae099dbbd48663afc305e3b939",
"sha256:40d7ea570b88db017c51392349cf99b7aefaaddd19d2c78368aeb0bddde9d390",
"sha256:629102a193162e37102c50713e2e31dc9a2fe7ac5e481da83e5bb3c0cee700aa",
"sha256:6d5ec9b8948c3d957e75ea14d41e9330e1ac3fed24ec53766c780f82805140dc",
"sha256:87331d1d6810214085a50749160196391a712a13336cd02ce1c3ea3d05bcf8d5",
"sha256:9a02a04bbe581c8605ac423ba3a74999ec9d8bce7ae37977a3d38680f5780b6d",
"sha256:9c4c83f4fa1938377da32bc2d59379025ceeee8e24b89f72fcbccd8ca22dc9bf",
"sha256:9cddaff94c0135ee627213ac6ca6d05724bfe6e7a356e5e09ec57bd3249510f6",
"sha256:a25237abf327530d9561ef751eef9511ab56fd9431023ca6f4803f1994104d72",
"sha256:a5cbd7157b0e383738b8e29d6e556fde8726823dae0e348952a61742b21aeb12",
"sha256:a97a516e02b726e089cffcde2eea0d3258450389bbac48cbe89e0f0b6e7b0366",
"sha256:acc89b29b5f4e2332d65cd1b7d10c609a75b88ef8925d487a611ca788432dfa4",
"sha256:b05bd85cc99b06740aad3629c2585bda7b83bd86e080b44ba47faf905fdf1300",
"sha256:c2bec436a2b5dafe5eaeb297c03711074d46b6eb236d002c13c42f25c4a8ce9d",
"sha256:cc619d974c8c11fe84527e4b5e1c07238799a8c29ea1c1285149170524ba9303",
"sha256:d4392defd4648badaa42b3e101080ae3313e8f4787cb517efd3f5b8157eaefd6",
"sha256:e1c3c582ee11af7f63a34a46f0448fca58e59889396ffdae1f482085061a2889"
"sha256:1e984191d1ec186881ffaed4581092ba04f7c61582a177b187d3a2f07ed9719e",
"sha256:259ab809ff0727d0e834ac5e8a283dc5e3e0ecc30c4d80b3cd17a4139ce1f326",
"sha256:2f4d1a4fdce595c947162333353d4a44952a724fba9ca3205a3df99a33d1307a",
"sha256:32e5f3b7e511aa850829fbe5aa32eb455e5534eaa4b1ce93231d00e2f76e5654",
"sha256:344c780466b73095a72c616fac5ea9c4665add7fc129f285fbdbca3cccf4612a",
"sha256:460bd4237d2dbecc3b5ed57e122992f60188afe46e7319116da5eb8a9dfedba4",
"sha256:4c6efd824d44ae697814a2a85604d8e992b875462c6655da161ff18fd4f29f17",
"sha256:50aaad128e6ac62e7bf7bd1f0c0a24bc968a0c0590a726d5a955af193544bcec",
"sha256:6206a135d072f88da3e71cc501c59d5abffa9d0bb43269a6dcd28d66bfafdbdd",
"sha256:65f31b622af739a802ca6fd1a3076fd0ae523f8485c52924a89561ba10c49b48",
"sha256:ae55bac364c405caa23a4f2d6cfecc6a0daada500274ffca4a9230e7129eac59",
"sha256:b778ce0c909a2653741cb4b1ac7015b5c130ab9c897611df43ae6a58523cb965"
],
"version": "==3.5.4"
"version": "==3.6.2"
},
"async-timeout": {
"hashes": [
@ -52,17 +42,17 @@
},
"attrs": {
"hashes": [
"sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79",
"sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399"
"sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c",
"sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"
],
"version": "==19.1.0"
"version": "==19.3.0"
},
"certifi": {
"hashes": [
"sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939",
"sha256:945e3ba63a0b9f577b1395204e13c3a231f9bc0223888be653286534e5873695"
"sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50",
"sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef"
],
"version": "==2019.6.16"
"version": "==2019.9.11"
},
"chardet": {
"hashes": [
@ -118,11 +108,11 @@
},
"elasticsearch": {
"hashes": [
"sha256:cbc73831c63fa2824538df76fcb2c4be007b43dbd9e7788ae70ea6d24109925b",
"sha256:d1b176b87a7fb75dca82978c82a4023e8b21cbc98f4018cb51190fb0b8b43764"
"sha256:693935914d59a517dfffdaab547ff906712a386d9e25027517464960221cbd4c",
"sha256:7644fa0a9ae524344185bda561826a781a5c6bd4d3eb98a24515c567aab88327"
],
"index": "pypi",
"version": "==7.0.2"
"version": "==7.0.5"
},
"elasticsearch-async": {
"hashes": [
@ -146,12 +136,26 @@
"markers": "python_version < '3.7'",
"version": "==1.1.0"
},
"importlib-metadata": {
"hashes": [
"sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26",
"sha256:d5f18a79777f3aa179c145737780282e27b508fc8fd688cb17c7a813e8bd39af"
],
"version": "==0.23"
},
"jsonschema": {
"hashes": [
"sha256:5f9c0a719ca2ce14c5de2fd350a64fd2d13e8539db29836a86adc990bb1a068f",
"sha256:8d4a2b7b6c2237e0199c8ea1a6d3e05bf118e289ae2b9d7ba444182a2959560d"
"sha256:2fa0684276b6333ff3c0b1b27081f4b2305f0a36cf702a23db50edb141893c3f",
"sha256:94c0a13b4a0616458b42529091624e66700a17f847453e52279e35509a5b7631"
],
"version": "==3.0.2"
"version": "==3.1.1"
},
"more-itertools": {
"hashes": [
"sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832",
"sha256:92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4"
],
"version": "==7.2.0"
},
"multidict": {
"hashes": [
@ -189,18 +193,26 @@
},
"pathspec": {
"hashes": [
"sha256:54a5eab895d89f342b52ba2bffe70930ef9f8d96e398cccf530d21fa0516a873"
"sha256:e285ccc8b0785beadd4c18e5708b12bb8fcf529a1e61215b3feff1d1e559ea5c"
],
"version": "==0.5.9"
"version": "==0.6.0"
},
"progressbar2": {
"hashes": [
"sha256:7538d02045a1fd3aa2b2834bfda463da8755bd3ff050edc6c5ddff3bc616215f",
"sha256:eb774d1e0d03ea4730f381c13c2c6ae7abb5ddfb14d8321d7a58a61aa708f0d0"
],
"index": "pypi",
"version": "==3.47.0"
},
"pymisp": {
"hashes": [
"sha256:5bff5e7705d2697fd6e7110d1f316688d6106795cba4d453eec8c78c18b0e9f7",
"sha256:85d319e0e1d4e53a901501ad74679f3802201b5e12df2da443aaae1d2443e3b1",
"sha256:a2fe66bada1186abc6237dc151473e307619685b8168aaeb31b6112528638d9e"
"sha256:1983808d9a834c26d42d52871af1f86dc9739c9f2ee22091cf4a2a62ce6a171d",
"sha256:32675ce303f9d06698eb390c5381cb1de430d355e203612264bce6cd53972b95",
"sha256:9cf1187b5d618bd2b0e631cc877586b7cd5d02b59322a509a4f5ad07496cd171"
],
"index": "pypi",
"version": "==2.4.112"
"version": "==2.4.117"
},
"pyrsistent": {
"hashes": [
@ -215,6 +227,13 @@
],
"version": "==2.8.0"
},
"python-utils": {
"hashes": [
"sha256:34aaf26b39b0b86628008f2ae0ac001b30e7986a8d303b61e1357dfcdad4f6d3",
"sha256:e25f840564554eaded56eaa395bca507b0b9e9f0ae5ecb13a8cb785305c56d25"
],
"version": "==2.3.0"
},
"pyyaml": {
"hashes": [
"sha256:0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9",
@ -259,10 +278,10 @@
},
"urllib3": {
"hashes": [
"sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1",
"sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232"
"sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398",
"sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86"
],
"version": "==1.25.3"
"version": "==1.25.6"
},
"wrapt": {
"hashes": [
@ -272,11 +291,11 @@
},
"yamllint": {
"hashes": [
"sha256:9a4fec2d40804979de5f54453fd1551bc1f8b59a7ad4a26fd7f26aeca34a83af",
"sha256:f97cd763fe7b588444a94cc44fd3764b832a613b5250baa2bfe8b84c91e4c330"
"sha256:24f05b7ff1a604120eeb5ff7afb7ed8792253bfa96ee83db9cec6d5c20feaf64",
"sha256:d42dbb35b3d28722a8c5c25de4593add0a6215b2732eb6932d89f38482c3d01c"
],
"index": "pypi",
"version": "==1.16.0"
"version": "==1.18.0"
},
"yarl": {
"hashes": [
@ -293,6 +312,13 @@
"sha256:e060906c0c585565c718d1c3841747b61c5439af2211e185f6739a9412dfbde1"
],
"version": "==1.3.0"
},
"zipp": {
"hashes": [
"sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e",
"sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335"
],
"version": "==0.6.0"
}
},
"develop": {}

91
tools/sigma-similarity Executable file
View File

@ -0,0 +1,91 @@
#!/usr/bin/env python3
# Calculates similarity of Sigma rules by transformation into a normalized
# string form and calculation of a string distance.
import argparse
import pathlib
import itertools
import difflib
import progressbar
from sigma.parser.collection import SigmaCollectionParser
from sigma.backends.base import SingleTextQueryBackend
from sigma.configuration import SigmaConfiguration
argparser = argparse.ArgumentParser(description="Calculate a similarity score between Sigma rules.")
argparser.add_argument("--recursive", "-r", action="store_true", help="Recurse into directories")
argparser.add_argument("--verbose", "-v", action="count", help="Be verbose. Use once more for debug output.")
argparser.add_argument("--top", "-t", type=int, help="Only output the n most similar rule pairs.")
argparser.add_argument("--min-similarity", "-m", type=int, help="Only output pairs with a similarity above this threshold (percent)")
argparser.add_argument("inputs", nargs="+", help="Sigma input files")
args = argparser.parse_args()
def print_verbose(level, *args, **kwargs):
if args.verbose >= level:
print(*args, **kwargs)
class SigmaNormalizationBackend(SingleTextQueryBackend):
"""Normalization of a Sigma rule into a non-existing query language that supports all Sigma features"""
andToken = " AND "
orToken = " OR "
notToken = " NOT "
subExpression = "(%s)"
listExpression = "[%s]"
listSeparator = ","
valueExpression = "%s"
typedValueExpression = dict()
nullExpression = "NULL(%s)"
notNullExpression = "NOTNULL(%s)"
mapExpression = "{'%s':'%s'}"
sort_condition_lists = True
def generateListNode(self, node):
"""Return sorted list"""
return super().generateListNode(list(sorted([ str(item) for item in node ])))
def generateTypedValueNode(self, node):
"""Return normalized form of typed values"""
return "type_{}({})".format(node.identifier, str(node))
def generateAggregation(self, agg):
if agg.aggfunc_notrans == "near":
return " near in={} ex={}".format(str(agg.include), str(agg.exclude))
else:
return " | {}({}) by {} {} {}".format(agg.aggfunc_notrans, agg.aggfield, agg.groupfield, agg.cond_op, agg.condition)
backend = SigmaNormalizationBackend(SigmaConfiguration())
if args.recursive:
paths = [ p for pathname in args.inputs for p in pathlib.Path(pathname).glob("**/*") if p.is_file() ]
else:
paths = [ pathlib.Path(pathname) for pathname in args.inputs ]
parsed = {
str(path): SigmaCollectionParser(path.open().read())
for path in paths
}
converted = {
str(path): list(sigma_collection.generate(backend))
for path, sigma_collection in parsed.items()
}
converted_flat = (
(path, i, normalized)
for path, nlist in converted.items()
for i, normalized in zip(range(len(nlist)), nlist)
)
converted_pairs = list(itertools.combinations(converted_flat, 2))
similarities = [
(item1[:2], item2[:2], difflib.SequenceMatcher(None, item1[2], item2[2]).ratio())
for item1, item2 in progressbar.progressbar(converted_pairs)
]
i = 0
for similarity in sorted(similarities, key=lambda s: s[2], reverse=True):
if args.min_similarity and similarity[2] * 100 < args.min_similarity: # finish after similarity drops below minimum
break
print("{:70} | {:2} | {:70} | {:2} | {:>3.2%}".format(*similarity[0], *similarity[1], similarity[2]))
i += 1
if args.top and i >= args.top: # end after $top pairs
break

View File

@ -90,7 +90,7 @@ class BaseBackend:
options = tuple() # a list of tuples with following elements: option name, default value, help text, target attribute name (option name if None)
config_required = True
def __init__(self, sigmaconfig, backend_options=None):
def __init__(self, sigmaconfig, backend_options=dict()):
"""
Initialize backend. This gets a sigmaconfig object, which is notified about the used backend class by
passing the object instance to it.
@ -221,10 +221,14 @@ class SingleTextQueryBackend(RulenameCommentMixin, BaseBackend, QuoteCharMixin):
mapListsSpecialHandling = False # Same handling for map items with list values as for normal values (strings, integers) if True, generateMapItemListNode method is called with node
mapListValueExpression = None # Syntax for field/value condititons where map value is a list
sort_condition_lists = False # Sort condition items for AND and OR conditions
def generateANDNode(self, node):
generated = [ self.generateNode(val) for val in node ]
filtered = [ g for g in generated if g is not None ]
if filtered:
if self.sort_condition_lists:
filtered = sorted(filtered)
return self.andToken.join(filtered)
else:
return None
@ -233,6 +237,8 @@ class SingleTextQueryBackend(RulenameCommentMixin, BaseBackend, QuoteCharMixin):
generated = [ self.generateNode(val) for val in node ]
filtered = [ g for g in generated if g is not None ]
if filtered:
if self.sort_condition_lists:
filtered = sorted(filtered)
return self.orToken.join(filtered)
else:
return None