EmlParser analyzer 1.0

This commit is contained in:
ninsmith 2018-05-29 22:58:05 +02:00
parent 13778d7dda
commit cdaf091604
3 changed files with 125 additions and 0 deletions

View File

@ -0,0 +1,18 @@
{
"name": "Eml_Parser",
"version": "1.0",
"author": "ninsmith",
"url": "https://github.com/TheHive-Project/Cortex-Analyzers",
"license": "AGPL-V3",
"baseconfig": "Eml_Parser",
"config": {
"check_tlp": false,
"max_tlp": 3,
"service": ""
},
"description": "Parse Eml message",
"dataTypeList": [
"file"
],
"command": "EmlParser/parse.py"
}

104
analyzers/EmlParser/parse.py Executable file
View File

@ -0,0 +1,104 @@
#!/usr/bin/env python3
# encoding: utf-8
import email.parser
import eml_parser
from cortexutils.analyzer import Analyzer
import magic
import binascii
from pprint import pprint
class EmlParserAnalyzer(Analyzer):
def __init__(self):
Analyzer.__init__(self)
#filename of the observable
self.filename = self.getParam('attachment.name', 'noname.ext')
#filepath to the observable, looks like /tmp/cortex-4224850437865873235-datafile
self.filepath = self.getParam('file', None, 'File is missing')
def run(self):
if self.data_type == 'file':
try:
parsingResult = parseEml(self.filepath)
self.report(parsingResult)
except Exception as e:
self.unexpectedError(e)
else:
self.notSupported()
def summary(self, raw):
taxonomies = []
level = "info"
namespace = "EmlParser"
predicate = "Attachments"
value = "\"0\""
if "attachments" in raw:
value = len(raw["attachments"])
taxonomies.append(self.build_taxonomy(level, namespace, predicate, value))
return {"taxonomies": taxonomies}
def parseEml(filepath):
result = dict()
result['subject'] = str()
result['date'] = str()
result['receivers'] = str()
result['displayFrom'] = str()
result['sender'] = str()
result['topic'] = str()
result['bcc'] = str()
result['displayto'] = str()
result['headers'] = str()
result['body'] = str()
result['attachments'] = list()
#read the file
with open(filepath, 'r') as f:
raw_eml = f.read()
#parsing the headers with the email library
#cause eml_parser does not provide raw headers (as far as I know)
hParser = email.parser.HeaderParser()
h = hParser.parsestr(raw_eml)
result['headers'] = (str(h).split('\n\n')[0])
parsed_eml = eml_parser.eml_parser.decode_email(filepath, include_raw_body=True, include_attachment_data=True)
#parsed_eml['header'].keys() gives:
#dict_keys(['received_foremail', 'from', 'date', 'received_domain', 'to', 'header', 'received_ip', 'subject', 'received'])
result['subject'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('subject', ''))
result['date'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('date', ''))
result['receivers'] = ', '.join(parsed_eml.get('header', '').get('to', ''))
result['displayFrom'] = parsed_eml.get('header', '').get('from', '')
result['sender'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('x-env-sender', ''))
result['topic'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('thread-topic', ''))
result['bcc'] = parsed_eml.get('header', '').get('header', '').get('bcc', '')
result['displayto'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('to', ''))
result['body'] = parsed_eml['body'][0]['content']
#attachments
try:
for attachment in parsed_eml['attachment']:
attachmentSumUp = dict()
attachmentSumUp['filename'] = attachment.get('filename', '')
#because of module conflict name with magic
#eml-parser does not provide the mime type
#it has to be calculated, the attachment is in base64
attachmentSumUp['mime'] = magic.from_buffer(binascii.a2b_base64(attachment['raw']))
attachmentSumUp['extension'] = attachment.get('extension', '')
result['attachments'].append(attachmentSumUp)
except KeyError as e:
pass
return result
if __name__ == '__main__':
EmlParserAnalyzer().run()

View File

@ -0,0 +1,3 @@
cortexutils-1.2.4;python_version>='3.5'
eml_parser-1.8;python_version>='3.5'
python-magic-0.4.15;python_version>='3.5'