New FileInfo ready to be tested

2024-11-07 09:28:58 +00:00 · 2018-06-06 11:39:47 +02:00 · 2018-06-06 11:39:47 +02:00 · 33ac4edb5e
commit 33ac4edb5e
parent 999974e269 f4d3948f45
19 changed files with 1014 additions and 1480 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,3 +7,11 @@ thehive-templates/*.sh
 .DS_Store

 Cortex-analyzers.iml
+
+# python-venv
+bin
+lib
+lib64
+pyvenv.cfg
+share
+
--- a/analyzers/File_Info/File_Info.json
+++ b/analyzers/File_Info/File_Info.json
@ -1,11 +1,14 @@
 {
-  "name": "File_Info",
-  "version": "2.0",
-  "author": "CERT-BDF",
+  "name": "FileInfo",
+  "version": "3.0",
+  "author": "TheHive-Project",
  "url": "https://github.com/TheHive-Project/Cortex-Analyzers",
  "license": "AGPL-V3",
  "description": "Parse files in several formats such as OLE and OpenXML to detect VBA macros, extract their source code, generate useful information on PE, PDF files...",
  "dataTypeList": ["file"],
-  "baseConfig": "File_Info",
-  "command": "File_Info/fileinfo_analyzer.py"
+  "baseConfig": "FileInfo",
+  "command": "FileInfo/fileinfo_analyzer.py",
+  "configurationItems": [
+  ]
 }
+
--- a/analyzers/FileInfo/fileinfo_analyzer.py
+++ b/analyzers/FileInfo/fileinfo_analyzer.py
@ -0,0 +1,53 @@
+#!/usr/bin/env python
+import pyexifinfo
+import magic
+
+from cortexutils.analyzer import Analyzer
+from submodules import available_submodules
+from submodules.submodule_metadata import MetadataSubmodule
+
+
+class FileInfoAnalyzer(Analyzer):
+    def __init__(self):
+        Analyzer.__init__(self)
+        self.filepath = self.get_param('file', None, 'File parameter is missing.')
+        self.filename = self.get_param('filename', None, 'Filename is missing.')
+        self.filetype = pyexifinfo.fileType(self.filepath)
+        self.mimetype = magic.Magic(mime=True).from_file(self.filepath)
+
+    def summary(self, raw):
+        taxonomies = []
+        for submodule in raw['results']:
+            taxonomies += submodule['summary']['taxonomies']
+        return {'taxonomies': taxonomies}
+
+
+
+    def run(self):
+        results = []
+
+        # Add metadata to result directly as it's mandatory
+        m = MetadataSubmodule()
+        metadata_results = m.analyze_file(self.filepath)
+        results.append({
+            'submodule_name': m.name,
+            'results': metadata_results,
+            'summary': m.module_summary()
+        })
+
+        for module in available_submodules:
+            if module.check_file(file=self.filepath, filetype=self.filetype, filename=self.filename,
+                                 mimetype=self.mimetype):
+                module_results = module.analyze_file(self.filepath)
+                module_summaries = module.module_summary()
+                results.append({
+                   'submodule_name': module.name,
+                   'results': module_results,
+                    'summary': module_summaries
+                })
+
+        self.report({'results': results})
+
+
+if __name__ == '__main__':
+    FileInfoAnalyzer().run()
--- a/analyzers/FileInfo/requirements.txt
+++ b/analyzers/FileInfo/requirements.txt
@ -0,0 +1,10 @@
+cortexutils
+python-magic
+ssdeep
+pyexifinfo
+pefile
+git+https://github.com/AnyMaster/pehashng
+git+https://github.com/Rafiot/pdfid.git
+oletools
+git+https://github.com/mattgwwalker/msg-extractor.git
+IMAPClient
--- a/analyzers/FileInfo/submodules/init.py
+++ b/analyzers/FileInfo/submodules/init.py
@ -0,0 +1,11 @@
+from .submodule_oletools import OLEToolsSubmodule
+from .submodule_pe import PESubmodule
+from .submodule_pdfid import PDFIDSubmodule
+from .submodule_outlook import OutlookSubmodule
+
+available_submodules = [
+    PESubmodule(),
+    OLEToolsSubmodule(),
+    PDFIDSubmodule(),
+    OutlookSubmodule()
+]
--- a/analyzers/FileInfo/submodules/submodule_base.py
+++ b/analyzers/FileInfo/submodules/submodule_base.py
@ -0,0 +1,81 @@
+class SubmoduleBaseclass(object):
+    def __init__(self):
+        self.name = 'This is where the module name should go.'
+        self.results = []
+        self.summary = {'taxonomies': []}
+
+    def get_name(self):
+        """
+        Returns the name of analyzer submodule.
+
+        :return: name
+        :rtype: str
+        """
+        return self.name
+
+    def build_taxonomy(self, level, namespace, predicate, value):
+        """
+        :param level: info, safe, suspicious or malicious
+        :param namespace: Name of analyzer
+        :param predicate: Name of service
+        :param value: value
+        :return: dict
+        """
+        return {
+                'level': level,
+                'namespace': namespace,
+                'predicate': predicate,
+                'value': value
+                }
+
+    def check_file(self, **kwargs):
+        """
+        Checks if a file can be analyzed by the respective submodule. This can be done using the file-parameter or the
+        filetype-parameter (fileType() of pyexiftool). Submodules can choose how to check, if a file fits their analysis
+        method.
+        If this returns true, the analyze_file() function gets called.
+
+        :param file: used for checking compatiblity for a file directly.
+        :type file: str
+        :param filetype: used for checking compatibility for a file using the filetype string of pyexiftool.fileType().
+        :type filetype: str
+        :param filename: the original filename, not the one given by cortex
+        :type filename: str
+        :return: true on success, false otherwise
+        :rtype: bool
+        """
+        return False
+
+    def analyze_file(self, path):
+        """
+        This starts the analyzation process. Depends on the return value of check_file().
+
+        :param path: path to file
+        :return:
+        :rtype: dict
+        """
+        return {}
+
+
+    def module_summary(self):
+        """
+        Build summary info for a submodule from its reports. Should be defined in each submodule that needs to show minireports
+
+        :return:
+        :rtype: dict
+        """
+        return self.summary
+
+    def add_result_subsection(self, subsection_header, results):
+        """
+        Adding a subsection to the section of the analyzer module
+
+        :param subsection_header: header of the subsection
+        :param results: result dictionary or list
+        :param summary: result dictionary
+        :return:
+        """
+        self.results.append({
+            "submodule_section_header": subsection_header,
+            "submodule_section_content": results
+        })
--- a/analyzers/FileInfo/submodules/submodule_gzip.py
+++ b/analyzers/FileInfo/submodules/submodule_gzip.py
@ -0,0 +1,18 @@
+from .submodule_base import SubmoduleBaseclass
+
+
+class GZIPSubmodule(SubmoduleBaseclass):
+    """This is just for showing how to include a submodule. No real functionality here."""
+
+    def __init__(self):
+        SubmoduleBaseclass.__init__(self)
+        self.name = 'GZIP Test'
+
+    def check_file(self, **kwargs):
+        if kwargs.get('filetype') == 'GZIP':
+            return True
+        return False
+
+    def analyze_file(self, path):
+        self.add_result_subsection('TEST', {})
+        return self.results
--- a/analyzers/FileInfo/submodules/submodule_metadata.py
+++ b/analyzers/FileInfo/submodules/submodule_metadata.py
@ -0,0 +1,81 @@
+import magic
+import hashlib
+import io
+import os
+import pyexifinfo
+
+from .submodule_base import SubmoduleBaseclass
+from ssdeep import Hash
+
+
+class MetadataSubmodule(SubmoduleBaseclass):
+    def __init__(self):
+        SubmoduleBaseclass.__init__(self)
+        self.name = 'Basic properties'
+
+    def check_file(self, **kwargs):
+        """
+        Metadata submodule will analyze every file, therefore it will always return true.
+
+        :return: True
+        """
+        return True
+
+    def exif(self, path):
+        # Exif info
+        exifreport = pyexifinfo.get_json(path)
+        result = dict((key, value) for key, value in exifreport[0].items() if
+                      not (key.startswith("File") or key.startswith("SourceFile")))
+        return result
+
+    def module_summary(self):
+        taxonomy = {'level': 'info', 'namespace': 'FileInfo', 'predicate': 'Filetype', 'value': ''}
+        taxonomies = []
+
+        for section in self.results:
+            if section['submodule_section_header'] == 'File information':
+                t = taxonomy
+                t['value'] = section['submodule_section_content']['Filetype']
+                taxonomies.append(t)
+            else:
+                pass
+
+        self.summary['taxonomies'] = taxonomies
+        return self.summary
+
+    def analyze_file(self, path):
+        # Hash the file
+        with io.open(path, 'rb') as fh:
+            buf = fh.read()
+            md5 = hashlib.md5()
+            md5.update(buf)
+            sha1 = hashlib.sha1()
+            sha1.update(buf)
+            sha256 = hashlib.sha256()
+            sha256.update(buf)
+            ssdeep = Hash()
+            ssdeep.update(buf)
+
+        self.add_result_subsection('Hashes', {
+            'md5': md5.hexdigest(),
+            'sha1': sha1.hexdigest(),
+            'sha256': sha256.hexdigest(),
+            'ssdeep': ssdeep.digest()
+        })
+
+        self.add_result_subsection('Exif Info', self.exif(path))
+
+        # Get libmagic info
+        magicliteral = magic.Magic().from_file(path)
+        mimetype = magic.Magic(mime=True).from_file(path)
+        # filetype = pyexifinfo.fileType(path)
+
+
+        self.add_result_subsection('File information', {
+            'Magic literal': magicliteral,
+            'MimeType': mimetype,
+            'Filetype': pyexifinfo.fileType(path),
+            'Filesize': os.path.getsize(path)}
+                                   )
+
+        return self.results
--- a/analyzers/FileInfo/submodules/submodule_oletools.py
+++ b/analyzers/FileInfo/submodules/submodule_oletools.py
@ -0,0 +1,110 @@
+"""FileInfo oletools submodule; WIP"""
+from .submodule_base import SubmoduleBaseclass
+from oletools.olevba3 import VBA_Parser_CLI
+from oletools.msodde import process_file
+from oletools.olevba3 import __version__ as olevba_version
+from oletools.msodde import __version__ as msodde_version
+
+
+
+class OLEToolsSubmodule(SubmoduleBaseclass):
+    """Try to inspect files using python oletools."""
+
+    def __init__(self):
+        SubmoduleBaseclass.__init__(self)
+        self.name = 'Oletools Submodule'
+
+    def check_file(self, **kwargs):
+        """Oletools accepts MS office documents."""
+
+        try:
+            if kwargs.get('filetype') in [
+                'DOC',
+                'DOCM',
+                'DOCX',
+                'XLS',
+                'XLSM',
+                'XLSX',
+                'PPT',
+                'PPTM',
+                'PPTX'
+            ]:
+                return True
+        except KeyError:
+            return False
+        return False
+
+    def analyze_file(self, path):
+        # Run the analyze functions
+        self.analyze_vba(path)
+        self.analyze_dde(path)
+
+        return self.results
+
+    def module_summary(self):
+        taxonomies = []
+        level = 'info'
+        namespace = 'FileInfo'
+        predicate = ''
+        value = ''
+
+        for section in self.results:
+            if section['submodule_section_header'] == 'Olevba':
+                predicate = 'Olevba'
+                type_list = []
+                for a in section['submodule_section_content']['analysis']:
+                    if a["type"] not in type_list:
+                        type_list.append(a["type"])
+
+                if "Suspicious" in type_list:
+                    level = 'suspicious'
+                if "VBA string" in type_list:
+                    taxonomies.append(self.build_taxonomy(level, namespace, predicate, "VBA string"))
+                if "Base64 String" in type_list:
+                    taxonomies.append(self.build_taxonomy(level, namespace, predicate, "Base64 string"))
+                if "Hex String" in type_list:
+                    taxonomies.append(self.build_taxonomy(level, namespace, predicate, "Hex string"))
+
+            if section['submodule_section_header'] == 'DDE Analysis':
+                predicate = 'DDE'
+                if section['submodule_section_content']['Info']:
+                    level = 'info'
+                    taxonomies.append(self.build_taxonomy(level, namespace, predicate, 'None'))
+                else:
+                    level = 'suspicious'
+                    taxonomies.append(self.build_taxonomy(level, namespace, predicate, 'URL found'))
+
+        self.summary['taxonomies'] = taxonomies
+        self.summary['Olevba'] = olevba_version
+        self.summary['Msodde'] = msodde_version
+
+        return self.summary
+
+    def analyze_vba(self, path):
+        """Analyze a given sample for malicious vba."""
+
+        try:
+
+            vba_parser = VBA_Parser_CLI(path, relaxed=True)
+            vbaparser_result = vba_parser.process_file_json(show_decoded_strings=True,
+                                                            display_code=True,
+                                                            hide_attributes=False,
+                                                            vba_code_only=False,
+                                                            show_deobfuscated_code=True,
+                                                            deobfuscate=True)
+
+            self.add_result_subsection('Olevba', vbaparser_result)
+        except TypeError:
+            self.add_result_subsection('Oletools VBA Analysis failed', 'Analysis failed due to an filetype error.'
+                                                                       'The file does not seem to be a valid MS-Office '
+                                                                       'file.')
+
+    def analyze_dde(self, path):
+        version = {'Msodde version': msodde_version}
+        results = process_file(path)
+        if len(results) > 0:
+            self.add_result_subsection('DDE Analysis', {'DDEUrl': results})
+        else:
+            self.add_result_subsection('DDE Analysis', {'Info': 'No DDE URLs found.'})
+
+
--- a/analyzers/FileInfo/submodules/submodule_outlook.py
+++ b/analyzers/FileInfo/submodules/submodule_outlook.py
@ -0,0 +1,48 @@
+import hashlib
+from .submodule_base import SubmoduleBaseclass
+
+from ExtractMsg import Message, Attachment
+from imapclient.imapclient import decode_utf7
+
+
+class OutlookSubmodule(SubmoduleBaseclass):
+    """Parse Outlook Mail and get useful information"""
+
+    def __init__(self):
+        SubmoduleBaseclass.__init__(self)
+        self.name = 'Outlook mail Information'
+
+    def check_file(self, **kwargs):
+        try:
+            if kwargs.get('mimetype') in ['application/vnd.ms-outlook', 'application/CDFV2-unknown']:
+                return True
+        except KeyError:
+            return False
+        return False
+
+    def analyze_file(self, path):
+
+        m = Message(path)
+
+        def xstr(s):
+            return '' if s is None else str(s)
+
+        attachments = m.attachments
+        a = []
+        for attachment in attachments:
+            sha256 = hashlib.sha256()
+            sha256.update(attachment.data)
+            a.append({'name': attachment.longFilename,
+                      'sha256': sha256.hexdigest()})
+
+        email = {'header': xstr(m.header),
+                    'from': xstr(m.sender),
+                    'to': xstr(m.to),
+                    'cc': xstr(m.cc),
+                    'subject': xstr(m.subject),
+                    'date': xstr(m.date),
+                    'body': decode_utf7(m.body),
+                    'attachments': a
+                 }
+        self.add_result_subsection('Email details', email)
+        return self.results
--- a/analyzers/FileInfo/submodules/submodule_pdfid.py
+++ b/analyzers/FileInfo/submodules/submodule_pdfid.py
@ -0,0 +1,54 @@
+from pdfid.pdfid import *
+
+import json
+
+from .submodule_base import SubmoduleBaseclass
+
+
+class PDFIDSubmodule(SubmoduleBaseclass):
+    def __init__(self):
+        SubmoduleBaseclass.__init__(self)
+        self.name = 'PDF Information'
+
+    def check_file(self, **kwargs):
+        """
+        PE submodule will analyze every PE like EXE, DLL or DRIVER, therefore it will always return true.
+
+        :return: True
+        """
+        if kwargs.get('filetype') in ['PDF']:
+            return True
+
+    def module_summary(self):
+        taxonomies = []
+        level = 'info'
+        namespace = 'FileInfo'
+        predicate = 'PDFiD'
+        value = ''
+        pdfid_version = ''
+        for section in self.results:
+            if section['submodule_section_header'] == 'PDFiD Information':
+                for subsection in section['submodule_section_content']:
+                    if subsection['pdfid']:
+                        pdfid_version = subsection['pdfid']['version']
+                        for keyword in subsection['pdfid']['keywords']['keyword']:
+                            if keyword['name'] in ['/JS', '/JavaScript', '/OpenAction'] and keyword['count'] > 0:
+                                level = 'suspicious'
+                                taxonomies.append(self.build_taxonomy(level, namespace, predicate, keyword['name']))
+
+        self.summary['taxonomies'] = taxonomies
+        self.summary['pdfid'] = pdfid_version
+        return self.summary
+
+    def pdfid_cmd(self, path):
+        try:
+            j = json.loads(
+                PDFiD2JSON(PDFiD(path, allNames=True, extraData=True, disarm=False, force=True), force=True))
+        except Exception as e:
+            return e
+        return j
+
+
+    def analyze_file(self, path):
+        self.add_result_subsection('PDFiD Information', self.pdfid_cmd(path))
+        return self.results
--- a/analyzers/FileInfo/submodules/submodule_pe.py
+++ b/analyzers/FileInfo/submodules/submodule_pe.py
@ -0,0 +1,138 @@
+import pefile
+import pehashng
+from pefile import __version__ as pefile_version
+
+from .submodule_base import SubmoduleBaseclass
+
+
+class PESubmodule(SubmoduleBaseclass):
+    def __init__(self):
+        SubmoduleBaseclass.__init__(self)
+        self.name = 'PE Information'
+
+    def check_file(self, **kwargs):
+        """
+        PE submodule will analyze every PE like EXE, DLL or DRIVER, therefore it will always return true.
+
+        :return: True
+        """
+        try:
+            if kwargs.get('filetype') in ['Win32 EXE', 'Win64 EXE']:
+                return True
+        except KeyError:
+            return False
+        return False
+
+    @staticmethod
+    def pe_machine(pedict):
+        if pedict:
+            machinetype = pedict.get('FILE_HEADER').get('Machine').get('Value')
+            mt = {'0x14c': 'x86', '0x0200': 'Itanium', '0x8664': 'x64'}
+            if type(machinetype) is int:
+                return mt[str(hex(machinetype))]
+            else:
+                return str(machinetype) + ' => Not x86/64 or Itanium'
+
+    @staticmethod
+    def pe_type(pe):
+        if pe.is_exe():
+            return "EXE"
+        elif pe.is_dll():
+            return "DLL"
+        elif pe.is_driver():
+            return "DRIVER"
+        else:
+            return "UNKNOWN"
+
+    @staticmethod
+    def pe_dump(pe):
+        return pe.dump_info()
+
+
+    @staticmethod
+    def compilation_timestamp(pedict):
+        if pedict:
+            return pedict.get('FILE_HEADER').get('TimeDateStamp').get('Value')
+        else:
+            return 'None'
+
+    @staticmethod
+    def pe_entrypoint(pedict):
+        if pedict:
+            return hex(pedict.get('OPTIONAL_HEADER').get('AddressOfEntryPoint').get('Value'))
+        else:
+            return 'None'
+
+    def pe_info(self, pe):
+        pedict = pe.dump_dict()
+        table = []
+        try:
+            for fileinfo in pe.FileInfo:
+                if fileinfo.Key.decode() == 'StringFileInfo':
+                    for stringtable in fileinfo.StringTable:
+                        for entry in stringtable.entries.items():
+                            table.append({'Info': entry[0].decode(), 'Value': entry[1].decode()})
+
+            table.append({'Info': 'Compilation Timestamp',
+                          'Value': self.compilation_timestamp(pedict)})
+            table.append({'Info': 'Target machine', 'Value': self.pe_machine(pedict)}),
+            table.append({'Info': 'Entry Point', 'Value': self.pe_entrypoint(pedict)})
+            return table
+        except Exception as excp:
+            return 'None'
+
+    @staticmethod
+    def pe_iat(pe):
+        table = []
+
+        if pe:
+            try:
+                for entry in pe.DIRECTORY_ENTRY_IMPORT:
+                    imp = {
+                        'entryname': entry.dll.decode(),
+                        'symbols': []
+                    }
+
+                    for symbol in entry.imports:
+                        if symbol.name is not None:
+                            imp['symbols'].append(symbol.name.decode())
+                    table.append(imp)
+            except AttributeError:
+                pass
+        return table
+
+    # PE:Sections list of {Name, Size, Entropy, MD5, SHA1, SHA256, SHA512} #
+    @staticmethod
+    def pe_sections(pe):
+        table = []
+        if pe:
+            for entry in pe.sections:
+                sect = {'entryname': str(entry.Name.decode()), 'SizeOfRawData': hex(entry.SizeOfRawData),
+                        'Entropy': entry.get_entropy(),
+                        'MD5': entry.get_hash_md5(),
+                        'SHA1': entry.get_hash_sha1(),
+                        'SHA256': entry.get_hash_sha256(),
+                        'SHA512': entry.get_hash_sha512()}
+                table.append(sect)
+        return table
+
+
+    def module_summary(self):
+        self.summary['pefile'] = pefile_version
+        return self.summary
+
+    def analyze_file(self, path):
+        try:
+            pe = pefile.PE(path)
+        except Exception:
+            return "Failed processing {}".format(path)
+
+        self.add_result_subsection('Headers', self.pe_info(pe))
+        self.add_result_subsection('Hashes', {
+                'impash': pe.get_imphash(),
+                'pehash': pehashng.pehashng(pe)
+            })
+        self.add_result_subsection('Import Adress Tables', self.pe_iat(pe))
+        self.add_result_subsection('Sections', self.pe_sections(pe))
+        self.add_result_subsection('pefile raw output', self.pe_dump(pe))
+        return self.results
--- a/analyzers/File_Info/fileinfo_analyzer.py
+++ b/analyzers/File_Info/fileinfo_analyzer.py
@ -1,182 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-
-import sys
-import json
-import codecs
-import magic
-from lib.File_analysis import file
-from cortexutils.analyzer import Analyzer
-
-
-class FileAnalyzer(Analyzer):
-    def __init__(self):
-        Analyzer.__init__(self)
-
-        self.filename = self.get_param('filename', 'noname.ext')
-        self.filepath = self.get_param('file', None, 'File is missing')
-
-    def file_info(self, report):
-        result = report
-        f = file(self.filepath)
-        try:
-            result['Mimetype'] = f.mimetype()
-        except Exception as excp:
-            self.error(str(excp))
-        result['Exif'] = f.exif()
-        result['Magic'] = f.magic()
-        result['filetype'] = f.filetype()
-        result['Identification'] = {'MD5': f.md5(),
-                                    'SHA1': f.sha1(),
-                                    'SHA256': f.sha256(),
-                                    'ssdeep': f.ssdeep()}
-        return result
-
-    # PE_Info analyzer
-    def pe_info(self, report):
-        result = report
-        f = file(self.filepath)
-        result['Identification'].update({'impash': f.imphash(),
-                                         'ssdeep': f.ssdeep(),
-                                         'pehash': f.pehash(),
-                                         'OperatingSystem': f.PE_OperatingSystem(),
-                                         'Type': f.PEtype()})
-        result['PE'] = {}
-        result['PE']['BasicInformation'] = {'FileInfo': f.PE_info(),
-                                            'FileSize': f.filesize(),
-                                            'TargetMachine': f.PE_Machine(),
-                                            'CompilationTimestamp': f.PE_CompilationTimestamp(),
-                                            'EntryPoint': f.PE_EntryPoint()}
-
-        result['PE']['Sections'] = f.PE_sections()
-        result['PE']['ImportAdressTable'] = f.PE_iat()
-        return result
-
-    def pe_summary(self, report):
-        result = {}
-        detections = {}
-        result.update({'detections': detections})
-        result.update({'filetype': report['filetype']})
-        return result
-
-    # PDFiD results analysis -- input for full report and summary
-    def pdfid_analysis(self, report):
-        # Parse detections
-        detections = {}
-        filetype = report['filetype']
-        keywords = report['PDF']['pdfid'][0]['pdfid']['keywords']['keyword']
-        score = 0
-        for obj in keywords:
-            if obj['name'] in ['/JavaScript', '/OpenAction', '/RichMedia', '/ObjStm', '/Launch']:
-                score = score + obj['count']
-                detections[obj['name']] = obj['count']
-
-        if score > 0:
-            suspicious = True
-        else:
-            suspicious = False
-        return {'score': score, 'detections': detections, 'suspicious': suspicious, 'filetype': filetype}
-
-    # PDF_Info analyzer
-    def pdf_info(self, report):
-        result = report
-        f = file(self.filepath)
-        result['PDF'] = {}
-        result['PDF']['pdfid'] = f.pdfid_cmd()
-        result['PDF']['pdfid'][0]['pdfid']['filename'] = self.filename
-        result['PDF']['pdfid'][0]['detections'] = self.pdfid_analysis(result)['detections']
-        result['PDF']['pdfid'][0]['score'] = self.pdfid_analysis(result)['score']
-        result['PDF']['pdfid'][0]['suspicious'] = self.pdfid_analysis(result)['suspicious']
-        return result
-
-    def pdf_summary(self, report):
-        result = {}
-        result.update({'score': self.pdfid_analysis(report)['score']})
-        result.update({'suspicious': self.pdfid_analysis(report)['suspicious']})
-        result.update({'detections': self.pdfid_analysis(report)['detections']})
-        result.update({'filetype': self.pdfid_analysis(report)['filetype']})
-        return result
-
-    # Office_Info
-    def msoffice_info(self, report):
-        result = report
-        f = file(self.filepath)
-        result['MSOffice'] = {}
-        result['MSOffice']['olevba'] = f.olevba_info()
-
-        return result
-
-    # MSOffice_Summary
-    def msoffice_summary(self, report):
-        r = report['MSOffice']['olevba']
-        result = {}
-        detections = {}
-        result.update({'filetype': report['filetype']})
-        detections['vba'] = r['vba']
-        detections['Base64 Strings'] = r['Base64 Strings']
-        detections['Hex Strings'] = r['Hex Strings']
-        result.update({'detections': detections})
-        result.update({'suspicious': r['Suspicious']})
-        return result
-
-    # SUMMARY
-    def summary(self, full_report):
-        taxonomies = []
-        level = "info"
-        namespace = "FileInfo"
-        predicate = "Filetype"
-
-        if full_report['Mimetype'] in ['application/x-dosexec']:
-            pereport = self.pe_summary(full_report)
-            taxonomies.append(self.build_taxonomy(level, namespace, predicate, pereport['filetype']))
-        elif full_report['Mimetype'] in ['application/pdf']:
-            pdfreport = self.pdf_summary(full_report)
-            value = "\"{}\"".format(pdfreport['filetype'])
-            if pdfreport['suspicious']:
-                level = 'suspicious'
-            taxonomies.append(self.build_taxonomy(level, namespace, predicate, value))
-        elif (full_report['filetype'] in ['DOC', 'DOCM', 'DOCX',
-                                          'XLS', 'XLSM', 'XLSX',
-                                          'PPT', "PPTM", 'PPTX']):
-            msreport = self.msoffice_summary(full_report)
-            value = "\"{}\"".format(msreport['filetype'])
-            if msreport['suspicious']:
-                level = 'suspicious'
-            taxonomies.append(self.build_taxonomy(level, namespace, predicate, value))
-        else:
-            value = "\"{}\"".format(full_report['filetype'])
-            level = 'info'
-            taxonomies.append(self.build_taxonomy(level, namespace, predicate, value))
-
-        result = {'taxonomies': taxonomies}
-        return result
-
-    def specific_info(self, report):
-        # run specific program for PE
-        if report['Mimetype'] in ['application/x-dosexec']:
-            self.pe_info(report)
-        # run specific program for PDF
-        if report['Mimetype'] in ['application/pdf']:
-            self.pdf_info(report)
-        # run specific program for MSOffice
-        if (report['filetype'] in ['DOC', 'DOCM', 'DOCX',
-                                   'XLS', 'XLSM', 'XLSX',
-                                   'PPT', "PPTM", 'PPTX']):
-            self.msoffice_info(report)
-
-    def run(self):
-        full_report = {}
-        if self.data_type == 'file':
-            try:
-                self.file_info(full_report)
-                self.specific_info(full_report)
-                self.report(full_report)
-            except Exception as e:
-                self.unexpectedError(e)
-        else:
-            self.notSupported()
-
-
-if __name__ == '__main__':
-    FileAnalyzer().run()
--- a/analyzers/File_Info/lib/File_analysis.py
+++ b/analyzers/File_Info/lib/File_analysis.py
@ -1,268 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-import sys
-import os
-import json
-import pefile
-import hashlib
-import pydeep
-import magic
-import pyexifinfo
-import re
-import pehashng
-from lib.pdfid import *
-from StringIO import StringIO
-
-reload(sys)
-sys.setdefaultencoding('utf-8')
-
-class file:
-
-    def __init__(self, filepath):
-        self.path = filepath
-        self.filename = os.path.basename(filepath)
-        self.stream = open(filepath, 'r').read()
-        if magic.Magic(mime=True).from_file(filepath) == 'application/x-dosexec':
-            try:
-                self.pe = pefile.PE(filepath)
-                self.pedict = self.pe.dump_dict()
-            except Exception as excp:
-                print('Failed processing %s') % filepath
-
-
-
-
-    # Magic
-    def magic(self):
-        return magic.Magic().from_file(self.path)
-
-    def mimetype(self):
-        return magic.Magic(mime=True).from_file(self.path)
-
-    # FileType
-    def filetype(self):
-        return pyexifinfo.fileType(self.path)
-
-    # ExifTool
-    def exif(self):
-        exifreport=pyexifinfo.get_json(self.path)
-        # result = json.dumps(exifreport).decode('unicode-escape').encode('utf8')
-        result=dict((key,value) for key,value in exifreport[0].iteritems() if not (key.startswith("File") or key.startswith("SourceFile")))
-
-        return result
-
-    # File hash
-    def md5(self):
-        return hashlib.md5(self.stream).hexdigest();
-
-    def sha1(self):
-        return hashlib.sha1(self.stream).hexdigest();
-
-    def sha256(self):
-        return hashlib.sha256(self.stream).hexdigest();
-
-    def ssdeep(self):
-        return pydeep.hash_file(self.path)
-
-    # PE: impash #
-    def imphash(self):
-        return self.pe.get_imphash()
-
-    # PE: pehash #
-    def pehash(self):
-        if self.pe:
-            return  pehashng.pehashng(self.pe)
-
-    # Fileinfo #
-    def filesize(self):
-        return os.path.getsize(self.path)
-
-    ##########
-    # PE     #
-    ##########
-
-    # PE : info #
-    def PE_info(self):
-        table=[]
-        try:
-            for fileinfo in self.pe.FileInfo:
-                if fileinfo.Key == 'StringFileInfo':
-                    for stringtable in fileinfo.StringTable:
-                        for entry in stringtable.entries.items():
-                            table.append({'Info':entry[0], 'Value':entry[1]})
-            return table
-        except Exception as excp:
-            return 'None'
-
-    # PE: type #
-    def PEtype(self):
-
-        if self.pe and self.pe.is_dll():
-            return "DLL"
-        if self.pe and self.pe.is_driver():
-            return "DRIVER"
-        if self.pe and self.pe.is_exe():
-            return "EXE"
-
-    # PE:  Timestamp #
-    def PE_CompilationTimestamp(self):
-        if self.pe:
-            return self.pedict['FILE_HEADER']['TimeDateStamp']['Value']
-
-    # PE: OS Version #
-    def PE_OperatingSystem(self):
-        if self.pe:
-            return str(self.pedict['OPTIONAL_HEADER']['MajorOperatingSystemVersion']['Value']) + "." \
-               + str(self.pedict['OPTIONAL_HEADER']['MinorOperatingSystemVersion']['Value'])
-
-    # PE:Machine type #
-    def PE_Machine(self):
-        if self.pe:
-            machinetype = self.pedict['FILE_HEADER']['Machine']['Value']
-            mt = {'0x14c': 'x86', '0x0200': 'Itanium', '0x8664': 'x64'}
-            if type(machinetype) is int:
-                return mt[str(hex(machinetype))]
-            else:
-                return str(machinetype) + ' => Not x86/64 or Itanium'
-
-    # PE:Entry Point #
-    def PE_EntryPoint(self):
-        if self.pe:
-            return hex(self.pedict['OPTIONAL_HEADER']['AddressOfEntryPoint']['Value'])
-
-    # PE:IAT list of {'entryname':'name', 'symbols':[list of symbols]}#
-    def PE_iat(self):
-        if self.pe:
-            table = []
-            for entry in self.pe.DIRECTORY_ENTRY_IMPORT:
-                imp = {'entryname': '', 'symbols': []}
-                imp['entryname']=entry.dll
-                for symbol in entry.imports:
-                    imp['symbols'].append(symbol.name)
-                table.append(imp)
-            return table
-
-    # PE Resources : WORK IN PROGRESS #
-    def PE_resources(self):
-        for rsrc in self.pe.DIRECTORY_ENTRY_RESOURCE.entries:
-            for entry in rsrc.directory.entries:
-                print entry.name.__str__()
-                for i in entry.directory.entries:
-                    print i.data.lang
-                    print i.data.sublang
-
-    # PE:Sections list of {Name, Size, Entropy, MD5, SHA1, SHA256, SHA512} #
-    def PE_sections(self):
-        if self.pe:
-            table = []
-            for entry in self.pe.sections:
-                sect = {'entryname':str(entry.Name),'SizeOfRawData':hex(entry.SizeOfRawData),
-                        'Entropy':entry.get_entropy(),
-                        'MD5':entry.get_hash_md5(),
-                        'SHA1':entry.get_hash_sha1(),
-                        'SHA256':entry.get_hash_sha256(),
-                        'SHA512':entry.get_hash_sha512()}
-                table.append(sect)
-                sect = {}
-            return table
-
-
-    # PE :Return dump_dict() for debug only #
-    def dump(self):
-        if self.pe:
-            return self.pedict
-
-
-
-
-    #########
-    # PDF   #
-    #########
-
-    # PDFiD #
-    def pdfid_cmd(self):
-        try:
-            oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url|@file] ...\n')
-            oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory')
-            oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names')
-            oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates')
-            oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header')
-            oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch')
-            oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)')
-            oParser.add_option('-c', '--csv', action='store_true', default=False, help='output csv data when using plugins')
-            oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output')
-            oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)')
-            oParser.add_option('-S', '--select', type=str, default='', help='selection expression')
-            oParser.add_option('-o', '--output', type=str, default='', help='output to log file')
-            (options, args) = oParser.parse_args()
-
-            return json.loads(PDFiD2JSON(PDFiD(self.path, options.all, options.extra, options.disarm, options.force), options.force))
-        except Exception as e:
-            traceback.print_exc(file=sys.stdout)
-            self.unexpectedError(e)
-
-    #############
-    # MS OFFICE #
-    #############
-
-    # Olevba #
-    # using https://bitbucket.org/decalage/oletools/wiki/olevba
-    def olevba_info(self):
-
-        try:
-            __import__('imp').find_module('oletools')
-            from oletools.olevba import VBA_Parser, VBA_Scanner
-            from oletools.olevba import __version__ as olevbaVersion
-        except ImportError:
-            self.error('Import Error: Module oletools not found')
-        # Redirect stderr to devnull in case input file is not a valid office document. When parsing a non valid
-        # document VBA Parser raises an error to stderr.
-        redir_err = sys.stderr = StringIO()
-        try:
-            vba = VBA_Parser(self.path)
-            result = {
-                'Suspicious': False,
-                'Base64 Strings': False,
-                'Hex Strings': False,
-                'Version': olevbaVersion
-            }
-        except TypeError:
-            self.error('File type error: ' + redir_err.getvalue())
-
-        # set stderr back to original __stderr__
-        sys.stderr = sys.__stderr__
-
-        if vba.detect_vba_macros():
-            result['vba'] = 'VBA Macros found'
-            streams = []
-            for (filename, stream_path, vba_filename, vba_code) in vba.extract_macros():
-                vba_scanner = VBA_Scanner(vba_code)
-                scan_results = vba_scanner.scan(include_decoded_strings=False)
-                vba_scan_results = []
-                for kw_type, keyword, description in scan_results:
-                    vba_scan_results.append({
-                        'type': str(kw_type).encode('utf-8'),
-                        'keyword': str(keyword).encode('utf-8'),
-                        'description': str(description).encode('utf-8')
-                    })
-
-                    if (kw_type == 'Suspicious'):
-                        result['Suspicious'] = True
-                    if (keyword == 'Base64 Strings'):
-                        result['Base64 Strings'] = True
-                    if (keyword == 'Hex Strings'):
-                        result['Hex Strings'] = True
-
-                streams.append({
-                    'Filename': self.filename,
-                    'OLE stream': stream_path,
-                    'VBA filename': vba_filename.decode('unicode-escape').encode('utf-8'),
-                    'VBA code': vba_code.decode('unicode-escape').encode('utf-8'),
-                    'scan_result': vba_scan_results
-                })
-            result['streams'] = streams
-        else:
-            result['vba'] = 'No VBA Macros found'
-
-        return result
--- a/analyzers/File_Info/lib/init.py
+++ b/analyzers/File_Info/lib/init.py
--- a/analyzers/File_Info/lib/pdfid.py
+++ b/analyzers/File_Info/lib/pdfid.py
@ -1,930 +0,0 @@
-#!/usr/bin/env python
-
-__description__ = 'Tool to test a PDF file'
-__author__ = 'Didier Stevens'
-__version__ = '0.2.1'
-__date__ = '2014/10/18'
-
-"""
-
-Tool to test a PDF file
-
-Source code put in public domain by Didier Stevens, no Copyright
-https://DidierStevens.com
-Use at your own risk
-
-History:
-  2009/03/27: start
-  2009/03/28: scan option
-  2009/03/29: V0.0.2: xml output
-  2009/03/31: V0.0.3: /ObjStm suggested by Dion
-  2009/04/02: V0.0.4: added ErrorMessage
-  2009/04/20: V0.0.5: added Dates
-  2009/04/21: V0.0.6: added entropy
-  2009/04/22: added disarm
-  2009/04/29: finished disarm
-  2009/05/13: V0.0.7: added cPDFEOF
-  2009/07/24: V0.0.8: added /AcroForm and /RichMedia, simplified %PDF header regex, extra date format (without TZ)
-  2009/07/25: added input redirection, option --force
-  2009/10/13: V0.0.9: added detection for CVE-2009-3459; added /RichMedia to disarm
-  2010/01/11: V0.0.10: relaxed %PDF header checking
-  2010/04/28: V0.0.11: added /Launch
-  2010/09/21: V0.0.12: fixed cntCharsAfterLastEOF bug; fix by Russell Holloway
-  2011/12/29: updated for Python 3, added keyword /EmbeddedFile
-  2012/03/03: added PDFiD2JSON; coded by Brandon Dixon
-  2013/02/10: V0.1.0: added http/https support; added support for ZIP file with password 'infected'
-  2013/03/11: V0.1.1: fixes for Python 3
-  2013/03/13: V0.1.2: Added error handling for files; added /XFA
-  2013/11/01: V0.2.0: Added @file & plugins
-  2013/11/02: continue
-  2013/11/04: added options -c, -m, -v
-  2013/11/06: added option -S
-  2013/11/08: continue
-  2013/11/09: added option -o
-  2013/11/15: refactoring
-  2014/09/30: added CSV header
-  2014/10/16: V0.2.1: added output when plugin & file not pdf
-  2014/10/18: some fixes for Python 3
-
-Todo:
-  - update XML example (entropy, EOF)
-  - code review, cleanup
-"""
-
-import optparse
-import os
-import re
-import xml.dom.minidom
-import traceback
-import math
-import operator
-import os.path
-import sys
-import json
-import zipfile
-import collections
-import glob
-try:
-    import urllib2
-    urllib23 = urllib2
-except:
-    import urllib.request
-    urllib23 = urllib.request
-
-#Convert 2 Bytes If Python 3
-def C2BIP3(string):
-    if sys.version_info[0] > 2:
-        return bytes([ord(x) for x in string])
-    else:
-        return string
-
-class cBinaryFile:
-    def __init__(self, file):
-        self.file = file
-        if file == '':
-            self.infile = sys.stdin
-        elif file.lower().startswith('http://') or file.lower().startswith('https://'):
-            try:
-                if sys.hexversion >= 0x020601F0:
-                    self.infile = urllib23.urlopen(file, timeout=5)
-                else:
-                    self.infile = urllib23.urlopen(file)
-            except urllib23.HTTPError:
-                print('Error accessing URL %s' % file)
-                print(sys.exc_info()[1])
-                sys.exit()
-        elif file.lower().endswith('.zip'):
-            try:
-                self.zipfile = zipfile.ZipFile(file, 'r')
-                self.infile = self.zipfile.open(self.zipfile.infolist()[0], 'r', C2BIP3('infected'))
-            except:
-                print('Error opening file %s' % file)
-                print(sys.exc_info()[1])
-                sys.exit()
-        else:
-            try:
-                self.infile = open(file, 'rb')
-            except:
-                print('Error opening file %s' % file)
-                print(sys.exc_info()[1])
-                sys.exit()
-        self.ungetted = []
-
-    def byte(self):
-        if len(self.ungetted) != 0:
-            return self.ungetted.pop()
-        inbyte = self.infile.read(1)
-        if not inbyte or inbyte == '':
-            self.infile.close()
-            return None
-        return ord(inbyte)
-
-    def bytes(self, size):
-        if size <= len(self.ungetted):
-            result = self.ungetted[0:size]
-            del self.ungetted[0:size]
-            return result
-        inbytes = self.infile.read(size - len(self.ungetted))
-        if inbytes == '':
-            self.infile.close()
-        if type(inbytes) == type(''):
-            result = self.ungetted + [ord(b) for b in inbytes]
-        else:
-            result = self.ungetted + [b for b in inbytes]
-        self.ungetted = []
-        return result
-
-    def unget(self, byte):
-        self.ungetted.append(byte)
-
-    def ungets(self, bytes):
-        bytes.reverse()
-        self.ungetted.extend(bytes)
-
-class cPDFDate:
-    def __init__(self):
-        self.state = 0
-
-    def parse(self, char):
-        if char == 'D':
-            self.state = 1
-            return None
-        elif self.state == 1:
-            if char == ':':
-                self.state = 2
-                self.digits1 = ''
-            else:
-                self.state = 0
-            return None
-        elif self.state == 2:
-            if len(self.digits1) < 14:
-                if char >= '0' and char <= '9':
-                    self.digits1 += char
-                    return None
-                else:
-                    self.state = 0
-                    return None
-            elif char == '+' or char == '-' or char == 'Z':
-                self.state = 3
-                self.digits2 = ''
-                self.TZ = char
-                return None
-            elif char == '"':
-                self.state = 0
-                self.date = 'D:' + self.digits1
-                return self.date
-            elif char < '0' or char > '9':
-                self.state = 0
-                self.date = 'D:' + self.digits1
-                return self.date
-            else:
-                self.state = 0
-                return None
-        elif self.state == 3:
-            if len(self.digits2) < 2:
-                if char >= '0' and char <= '9':
-                    self.digits2 += char
-                    return None
-                else:
-                    self.state = 0
-                    return None
-            elif len(self.digits2) == 2:
-                if char == "'":
-                    self.digits2 += char
-                    return None
-                else:
-                    self.state = 0
-                    return None
-            elif len(self.digits2) < 5:
-                if char >= '0' and char <= '9':
-                    self.digits2 += char
-                    if len(self.digits2) == 5:
-                        self.state = 0
-                        self.date = 'D:' + self.digits1 + self.TZ + self.digits2
-                        return self.date
-                    else:
-                        return None
-                else:
-                    self.state = 0
-                    return None
-
-def fEntropy(countByte, countTotal):
-    x = float(countByte) / countTotal
-    if x > 0:
-        return - x * math.log(x, 2)
-    else:
-        return 0.0
-
-class cEntropy:
-    def __init__(self):
-        self.allBucket = [0 for i in range(0, 256)]
-        self.streamBucket = [0 for i in range(0, 256)]
-
-    def add(self, byte, insideStream):
-        self.allBucket[byte] += 1
-        if insideStream:
-            self.streamBucket[byte] += 1
-
-    def removeInsideStream(self, byte):
-        if self.streamBucket[byte] > 0:
-            self.streamBucket[byte] -= 1
-
-    def calc(self):
-        self.nonStreamBucket = map(operator.sub, self.allBucket, self.streamBucket)
-        allCount = sum(self.allBucket)
-        streamCount = sum(self.streamBucket)
-        nonStreamCount = sum(self.nonStreamBucket)
-        return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket)))
-
-class cPDFEOF:
-    def __init__(self):
-        self.token = ''
-        self.cntEOFs = 0
-
-    def parse(self, char):
-        if self.cntEOFs > 0:
-            self.cntCharsAfterLastEOF += 1
-        if self.token == '' and char == '%':
-            self.token += char
-            return
-        elif self.token == '%' and char == '%':
-            self.token += char
-            return
-        elif self.token == '%%' and char == 'E':
-            self.token += char
-            return
-        elif self.token == '%%E' and char == 'O':
-            self.token += char
-            return
-        elif self.token == '%%EO' and char == 'F':
-            self.token += char
-            return
-        elif self.token == '%%EOF' and (char == '\n' or char == '\r' or char == ' ' or char == '\t'):
-            self.cntEOFs += 1
-            self.cntCharsAfterLastEOF = 0
-            if char == '\n':
-                self.token = ''
-            else:
-                self.token += char
-            return
-        elif self.token == '%%EOF\r':
-            if char == '\n':
-                self.cntCharsAfterLastEOF = 0
-            self.token = ''
-        else:
-            self.token = ''
-
-def FindPDFHeaderRelaxed(oBinaryFile):
-    bytes = oBinaryFile.bytes(1024)
-    index = ''.join([chr(byte) for byte in bytes]).find('%PDF')
-    if index == -1:
-        oBinaryFile.ungets(bytes)
-        return ([], None)
-    for endHeader in range(index + 4, index + 4 + 10):
-        if bytes[endHeader] == 10 or bytes[endHeader] == 13:
-            break
-    oBinaryFile.ungets(bytes[endHeader:])
-    return (bytes[0:endHeader], ''.join([chr(byte) for byte in bytes[index:endHeader]]))
-
-def Hexcode2String(char):
-    if type(char) == int:
-        return '#%02x' % char
-    else:
-        return char
-
-def SwapCase(char):
-    if type(char) == int:
-        return ord(chr(char).swapcase())
-    else:
-        return char.swapcase()
-
-def HexcodeName2String(hexcodeName):
-    return ''.join(map(Hexcode2String, hexcodeName))
-
-def SwapName(wordExact):
-    return map(SwapCase, wordExact)
-
-def UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut):
-    if word != '':
-        if slash + word in words:
-            words[slash + word][0] += 1
-            if hexcode:
-                words[slash + word][1] += 1
-        elif slash == '/' and allNames:
-            words[slash + word] = [1, 0]
-            if hexcode:
-                words[slash + word][1] += 1
-        if slash == '/':
-            lastName = slash + word
-        if slash == '':
-            if word == 'stream':
-                insideStream = True
-            if word == 'endstream':
-                if insideStream == True and oEntropy != None:
-                    for char in 'endstream':
-                        oEntropy.removeInsideStream(ord(char))
-                insideStream = False
-        if fOut != None:
-            if slash == '/' and '/' + word in ('/JS', '/JavaScript', '/AA', '/OpenAction', '/JBIG2Decode', '/RichMedia', '/Launch'):
-                wordExactSwapped = HexcodeName2String(SwapName(wordExact))
-                fOut.write(C2BIP3(wordExactSwapped))
-                print('/%s -> /%s' % (HexcodeName2String(wordExact), wordExactSwapped))
-            else:
-                fOut.write(C2BIP3(HexcodeName2String(wordExact)))
-    return ('', [], False, lastName, insideStream)
-
-class cCVE_2009_3459:
-    def __init__(self):
-        self.count = 0
-
-    def Check(self, lastName, word):
-        if (lastName == '/Colors' and word.isdigit() and int(word) > 2^24): # decided to alert when the number of colors is expressed with more than 3 bytes
-            self.count += 1
-
-def XMLAddAttribute(xmlDoc, name, value=None):
-    att = xmlDoc.createAttribute(name)
-    xmlDoc.documentElement.setAttributeNode(att)
-    if value != None:
-        att.nodeValue = value
-
-def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False):
-    """Example of XML output:
-    <PDFiD ErrorOccured="False" ErrorMessage="" Filename="test.pdf" Header="%PDF-1.1" IsPDF="True" Version="0.0.4" Entropy="4.28">
-            <Keywords>
-                    <Keyword Count="7" HexcodeCount="0" Name="obj"/>
-                    <Keyword Count="7" HexcodeCount="0" Name="endobj"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="stream"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="endstream"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="xref"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="trailer"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="startxref"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="/Page"/>
-                    <Keyword Count="0" HexcodeCount="0" Name="/Encrypt"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="/JS"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="/JavaScript"/>
-                    <Keyword Count="0" HexcodeCount="0" Name="/AA"/>
-                    <Keyword Count="1" HexcodeCount="0" Name="/OpenAction"/>
-                    <Keyword Count="0" HexcodeCount="0" Name="/JBIG2Decode"/>
-            </Keywords>
-            <Dates>
-                    <Date Value="D:20090128132916+01'00" Name="/ModDate"/>
-            </Dates>
-    </PDFiD>
-    """
-
-    word = ''
-    wordExact = []
-    hexcode = False
-    lastName = ''
-    insideStream = False
-    keywords = ('obj',
-                'endobj',
-                'stream',
-                'endstream',
-                'xref',
-                'trailer',
-                'startxref',
-                '/Page',
-                '/Encrypt',
-                '/ObjStm',
-                '/JS',
-                '/JavaScript',
-                '/AA',
-                '/OpenAction',
-                '/AcroForm',
-                '/JBIG2Decode',
-                '/RichMedia',
-                '/Launch',
-                '/EmbeddedFile',
-                '/XFA',
-               )
-    words = {}
-    dates = []
-    for keyword in keywords:
-        words[keyword] = [0, 0]
-    slash = ''
-    xmlDoc = xml.dom.minidom.getDOMImplementation().createDocument(None, 'PDFiD', None)
-    XMLAddAttribute(xmlDoc, 'Version', __version__)
-    XMLAddAttribute(xmlDoc, 'Filename', file)
-    attErrorOccured = XMLAddAttribute(xmlDoc, 'ErrorOccured', 'False')
-    attErrorMessage = XMLAddAttribute(xmlDoc, 'ErrorMessage', '')
-
-    oPDFDate = None
-    oEntropy = None
-    oPDFEOF = None
-    oCVE_2009_3459 = cCVE_2009_3459()
-    try:
-        attIsPDF = xmlDoc.createAttribute('IsPDF')
-        xmlDoc.documentElement.setAttributeNode(attIsPDF)
-        oBinaryFile = cBinaryFile(file)
-        if extraData:
-            oPDFDate = cPDFDate()
-            oEntropy = cEntropy()
-            oPDFEOF = cPDFEOF()
-        (bytesHeader, pdfHeader) = FindPDFHeaderRelaxed(oBinaryFile)
-        if disarm:
-            (pathfile, extension) = os.path.splitext(file)
-            fOut = open(pathfile + '.disarmed' + extension, 'wb')
-            for byteHeader in bytesHeader:
-                fOut.write(C2BIP3(chr(byteHeader)))
-        else:
-            fOut = None
-        if oEntropy != None:
-            for byteHeader in bytesHeader:
-                oEntropy.add(byteHeader, insideStream)
-        if pdfHeader == None and not force:
-            attIsPDF.nodeValue = 'False'
-            return xmlDoc
-        else:
-            if pdfHeader == None:
-                attIsPDF.nodeValue = 'False'
-                pdfHeader = ''
-            else:
-                attIsPDF.nodeValue = 'True'
-            att = xmlDoc.createAttribute('Header')
-            att.nodeValue = repr(pdfHeader[0:10]).strip("'")
-            xmlDoc.documentElement.setAttributeNode(att)
-        byte = oBinaryFile.byte()
-        while byte != None:
-            char = chr(byte)
-            charUpper = char.upper()
-            if charUpper >= 'A' and charUpper <= 'Z' or charUpper >= '0' and charUpper <= '9':
-                word += char
-                wordExact.append(char)
-            elif slash == '/' and char == '#':
-                d1 = oBinaryFile.byte()
-                if d1 != None:
-                    d2 = oBinaryFile.byte()
-                    if d2 != None and (chr(d1) >= '0' and chr(d1) <= '9' or chr(d1).upper() >= 'A' and chr(d1).upper() <= 'F') and (chr(d2) >= '0' and chr(d2) <= '9' or chr(d2).upper() >= 'A' and chr(d2).upper() <= 'F'):
-                        word += chr(int(chr(d1) + chr(d2), 16))
-                        wordExact.append(int(chr(d1) + chr(d2), 16))
-                        hexcode = True
-                        if oEntropy != None:
-                            oEntropy.add(d1, insideStream)
-                            oEntropy.add(d2, insideStream)
-                        if oPDFEOF != None:
-                            oPDFEOF.parse(d1)
-                            oPDFEOF.parse(d2)
-                    else:
-                        oBinaryFile.unget(d2)
-                        oBinaryFile.unget(d1)
-                        (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
-                        if disarm:
-                            fOut.write(C2BIP3(char))
-                else:
-                    oBinaryFile.unget(d1)
-                    (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
-                    if disarm:
-                        fOut.write(C2BIP3(char))
-            else:
-                oCVE_2009_3459.Check(lastName, word)
-
-                (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
-                if char == '/':
-                    slash = '/'
-                else:
-                    slash = ''
-                if disarm:
-                    fOut.write(C2BIP3(char))
-
-            if oPDFDate != None and oPDFDate.parse(char) != None:
-                dates.append([oPDFDate.date, lastName])
-
-            if oEntropy != None:
-                oEntropy.add(byte, insideStream)
-
-            if oPDFEOF != None:
-                oPDFEOF.parse(char)
-
-            byte = oBinaryFile.byte()
-        (word, wordExact, hexcode, lastName, insideStream) = UpdateWords(word, wordExact, slash, words, hexcode, allNames, lastName, insideStream, oEntropy, fOut)
-
-        # check to see if file ended with %%EOF.  If so, we can reset charsAfterLastEOF and add one to EOF count.  This is never performed in
-        # the parse function because it never gets called due to hitting the end of file.
-        if byte == None and oPDFEOF != None:
-            if oPDFEOF.token == '%%EOF':
-                oPDFEOF.cntEOFs += 1
-                oPDFEOF.cntCharsAfterLastEOF = 0
-                oPDFEOF.token = ''
-
-    except SystemExit:
-        sys.exit()
-    except:
-        attErrorOccured.nodeValue = 'True'
-        attErrorMessage.nodeValue = traceback.format_exc()
-
-    if disarm:
-        fOut.close()
-
-    attEntropyAll = xmlDoc.createAttribute('TotalEntropy')
-    xmlDoc.documentElement.setAttributeNode(attEntropyAll)
-    attCountAll = xmlDoc.createAttribute('TotalCount')
-    xmlDoc.documentElement.setAttributeNode(attCountAll)
-    attEntropyStream = xmlDoc.createAttribute('StreamEntropy')
-    xmlDoc.documentElement.setAttributeNode(attEntropyStream)
-    attCountStream = xmlDoc.createAttribute('StreamCount')
-    xmlDoc.documentElement.setAttributeNode(attCountStream)
-    attEntropyNonStream = xmlDoc.createAttribute('NonStreamEntropy')
-    xmlDoc.documentElement.setAttributeNode(attEntropyNonStream)
-    attCountNonStream = xmlDoc.createAttribute('NonStreamCount')
-    xmlDoc.documentElement.setAttributeNode(attCountNonStream)
-    if oEntropy != None:
-        (countAll, entropyAll , countStream, entropyStream, countNonStream, entropyNonStream) = oEntropy.calc()
-        attEntropyAll.nodeValue = '%f' % entropyAll
-        attCountAll.nodeValue = '%d' % countAll
-        attEntropyStream.nodeValue = '%f' % entropyStream
-        attCountStream.nodeValue = '%d' % countStream
-        attEntropyNonStream.nodeValue = '%f' % entropyNonStream
-        attCountNonStream.nodeValue = '%d' % countNonStream
-    else:
-        attEntropyAll.nodeValue = ''
-        attCountAll.nodeValue = ''
-        attEntropyStream.nodeValue = ''
-        attCountStream.nodeValue = ''
-        attEntropyNonStream.nodeValue = ''
-        attCountNonStream.nodeValue = ''
-    attCountEOF = xmlDoc.createAttribute('CountEOF')
-    xmlDoc.documentElement.setAttributeNode(attCountEOF)
-    attCountCharsAfterLastEOF = xmlDoc.createAttribute('CountCharsAfterLastEOF')
-    xmlDoc.documentElement.setAttributeNode(attCountCharsAfterLastEOF)
-    if oPDFEOF != None:
-        attCountEOF.nodeValue = '%d' % oPDFEOF.cntEOFs
-        attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF
-    else:
-        attCountEOF.nodeValue = ''
-        attCountCharsAfterLastEOF.nodeValue = ''
-
-    eleKeywords = xmlDoc.createElement('Keywords')
-    xmlDoc.documentElement.appendChild(eleKeywords)
-    for keyword in keywords:
-        eleKeyword = xmlDoc.createElement('Keyword')
-        eleKeywords.appendChild(eleKeyword)
-        att = xmlDoc.createAttribute('Name')
-        att.nodeValue = keyword
-        eleKeyword.setAttributeNode(att)
-        att = xmlDoc.createAttribute('Count')
-        att.nodeValue = str(words[keyword][0])
-        eleKeyword.setAttributeNode(att)
-        att = xmlDoc.createAttribute('HexcodeCount')
-        att.nodeValue = str(words[keyword][1])
-        eleKeyword.setAttributeNode(att)
-    eleKeyword = xmlDoc.createElement('Keyword')
-    eleKeywords.appendChild(eleKeyword)
-    att = xmlDoc.createAttribute('Name')
-    att.nodeValue = '/Colors > 2^24'
-    eleKeyword.setAttributeNode(att)
-    att = xmlDoc.createAttribute('Count')
-    att.nodeValue = str(oCVE_2009_3459.count)
-    eleKeyword.setAttributeNode(att)
-    att = xmlDoc.createAttribute('HexcodeCount')
-    att.nodeValue = str(0)
-    eleKeyword.setAttributeNode(att)
-    if allNames:
-        keys = sorted(words.keys())
-        for word in keys:
-            if not word in keywords:
-                eleKeyword = xmlDoc.createElement('Keyword')
-                eleKeywords.appendChild(eleKeyword)
-                att = xmlDoc.createAttribute('Name')
-                att.nodeValue = word
-                eleKeyword.setAttributeNode(att)
-                att = xmlDoc.createAttribute('Count')
-                att.nodeValue = str(words[word][0])
-                eleKeyword.setAttributeNode(att)
-                att = xmlDoc.createAttribute('HexcodeCount')
-                att.nodeValue = str(words[word][1])
-                eleKeyword.setAttributeNode(att)
-    eleDates = xmlDoc.createElement('Dates')
-    xmlDoc.documentElement.appendChild(eleDates)
-    dates.sort(key=lambda x: x[0])
-    for date in dates:
-        eleDate = xmlDoc.createElement('Date')
-        eleDates.appendChild(eleDate)
-        att = xmlDoc.createAttribute('Value')
-        att.nodeValue = date[0]
-        eleDate.setAttributeNode(att)
-        att = xmlDoc.createAttribute('Name')
-        att.nodeValue = date[1]
-        eleDate.setAttributeNode(att)
-    return xmlDoc
-
-def PDFiD2String(xmlDoc, force):
-    result = 'PDFiD %s %s\n' % (xmlDoc.documentElement.getAttribute('Version'), xmlDoc.documentElement.getAttribute('Filename'))
-    if xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True':
-        return result + '***Error occured***\n%s\n' % xmlDoc.documentElement.getAttribute('ErrorMessage')
-    if not force and xmlDoc.documentElement.getAttribute('IsPDF') == 'False':
-        return result + ' Not a PDF document\n'
-    result += ' PDF Header: %s\n' % xmlDoc.documentElement.getAttribute('Header')
-    for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
-        result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count')))
-        if int(node.getAttribute('HexcodeCount')) > 0:
-            result += '(%d)' % int(node.getAttribute('HexcodeCount'))
-        result += '\n'
-    if xmlDoc.documentElement.getAttribute('CountEOF') != '':
-        result += ' %-16s %7d\n' % ('%%EOF', int(xmlDoc.documentElement.getAttribute('CountEOF')))
-    if xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') != '':
-        result += ' %-16s %7d\n' % ('After last %%EOF', int(xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF')))
-    for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes:
-        result += ' %-23s %s\n' % (node.getAttribute('Value'), node.getAttribute('Name'))
-    if xmlDoc.documentElement.getAttribute('TotalEntropy') != '':
-        result += ' Total entropy:           %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('TotalEntropy'), xmlDoc.documentElement.getAttribute('TotalCount'))
-    if xmlDoc.documentElement.getAttribute('StreamEntropy') != '':
-        result += ' Entropy inside streams:  %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('StreamEntropy'), xmlDoc.documentElement.getAttribute('StreamCount'))
-    if xmlDoc.documentElement.getAttribute('NonStreamEntropy') != '':
-        result += ' Entropy outside streams: %s (%10s bytes)\n' % (xmlDoc.documentElement.getAttribute('NonStreamEntropy'), xmlDoc.documentElement.getAttribute('NonStreamCount'))
-    return result
-
-class cCount():
-    def __init__(self, count, hexcode):
-        self.count = count
-        self.hexcode = hexcode
-
-class cPDFiD():
-    def __init__(self, xmlDoc, force):
-        self.version = xmlDoc.documentElement.getAttribute('Version')
-        self.filename = xmlDoc.documentElement.getAttribute('Filename')
-        self.errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True'
-        self.errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage')
-        self.isPDF = None
-        if self.errorOccured:
-            return
-        self.isPDF = xmlDoc.documentElement.getAttribute('IsPDF') == 'True'
-        if not force and not self.isPDF:
-            return
-        self.header = xmlDoc.documentElement.getAttribute('Header')
-        self.keywords = {}
-        for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
-            self.keywords[node.getAttribute('Name')] = cCount(int(node.getAttribute('Count')), int(node.getAttribute('HexcodeCount')))
-        self.obj = self.keywords['obj']
-        self.endobj = self.keywords['endobj']
-        self.stream = self.keywords['stream']
-        self.endstream = self.keywords['endstream']
-        self.xref = self.keywords['xref']
-        self.trailer = self.keywords['trailer']
-        self.startxref = self.keywords['startxref']
-        self.page = self.keywords['/Page']
-        self.encrypt = self.keywords['/Encrypt']
-        self.objstm = self.keywords['/ObjStm']
-        self.js = self.keywords['/JS']
-        self.javascript = self.keywords['/JavaScript']
-        self.aa = self.keywords['/AA']
-        self.openaction = self.keywords['/OpenAction']
-        self.acroform = self.keywords['/AcroForm']
-        self.jbig2decode = self.keywords['/JBIG2Decode']
-        self.richmedia = self.keywords['/RichMedia']
-        self.launch = self.keywords['/Launch']
-        self.embeddedfile = self.keywords['/EmbeddedFile']
-        self.xfa = self.keywords['/XFA']
-        self.colors_gt_2_24 = self.keywords['/Colors > 2^24']
-
-def Print(lines, options):
-    print(lines)
-    filename = None
-    if options.scan:
-        filename = 'PDFiD.log'
-    if options.output != '':
-        filename = options.output
-    if filename:
-        logfile = open(filename, 'a')
-        logfile.write(lines + '\n')
-        logfile.close()
-
-def Quote(value, separator, quote):
-    if isinstance(value, str):
-        if separator in value:
-            return quote + value + quote
-    return value
-
-def MakeCSVLine(fields, separator=';', quote='"'):
-    formatstring = separator.join([field[0] for field in fields])
-    strings = [Quote(field[1], separator, quote) for field in fields]
-    return formatstring % tuple(strings)
-
-def ProcessFile(filename, options, plugins):
-    xmlDoc = PDFiD(filename, options.all, options.extra, options.disarm, options.force)
-    if plugins == [] and options.select == '':
-        Print(PDFiD2String(xmlDoc, options.force), options)
-        return
-
-    oPDFiD = cPDFiD(xmlDoc, options.force)
-    if options.select:
-        if options.force or not oPDFiD.errorOccured and oPDFiD.isPDF:
-            pdf = oPDFiD
-            try:
-                selected = eval(options.select)
-            except Exception as e:
-                Print('Error evaluating select expression: %s' % options.select, options)
-                if options.verbose:
-                    raise e
-                return
-            if selected:
-                if options.csv:
-                    Print(filename, options)
-                else:
-                    Print(PDFiD2String(xmlDoc, options.force), options)
-    else:
-        for cPlugin in plugins:
-            if not cPlugin.onlyValidPDF or not oPDFiD.errorOccured and oPDFiD.isPDF:
-                try:
-                    oPlugin = cPlugin(oPDFiD)
-                except Exception as e:
-                    Print('Error instantiating plugin: %s' % cPlugin.name, options)
-                    if options.verbose:
-                        raise e
-                    return
-
-                try:
-                    score = oPlugin.Score()
-                except Exception as e:
-                    Print('Error running plugin: %s' % cPlugin.name, options)
-                    if options.verbose:
-                        raise e
-                    return
-
-                if options.csv:
-                    if score >= options.minimumscore:
-                        Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%.02f', score))), options)
-                else:
-                    if score >= options.minimumscore:
-                        Print(PDFiD2String(xmlDoc, options.force), options)
-                        Print('%s score: %.02f' % (cPlugin.name, score), options)
-            else:
-                if options.csv:
-                    if oPDFiD.errorOccured:
-                        Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Error occured'))), options)
-                    if not oPDFiD.isPDF:
-                        Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Not a PDF document'))), options)
-                else:
-                    Print(PDFiD2String(xmlDoc, options.force), options)
-
-
-def Scan(directory, options, plugins):
-    try:
-        if os.path.isdir(directory):
-            for entry in os.listdir(directory):
-                Scan(os.path.join(directory, entry), options, plugins)
-        else:
-            ProcessFile(directory, options, plugins)
-    except Exception as e:
-#        print directory
-        print(e)
-#        print(sys.exc_info()[2])
-#        print traceback.format_exc()
-
-#function derived from: http://blog.9bplus.com/pdfidpy-output-to-json
-def PDFiD2JSON(xmlDoc, force):
-    #Get Top Layer Data
-    errorOccured = xmlDoc.documentElement.getAttribute('ErrorOccured')
-    errorMessage = xmlDoc.documentElement.getAttribute('ErrorMessage')
-    filename = xmlDoc.documentElement.getAttribute('Filename')
-    header = xmlDoc.documentElement.getAttribute('Header')
-    isPdf = xmlDoc.documentElement.getAttribute('IsPDF')
-    version = xmlDoc.documentElement.getAttribute('Version')
-    entropy = xmlDoc.documentElement.getAttribute('Entropy')
-
-    #extra data
-    countEof = xmlDoc.documentElement.getAttribute('CountEOF')
-    countChatAfterLastEof = xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF')
-    totalEntropy = xmlDoc.documentElement.getAttribute('TotalEntropy')
-    streamEntropy = xmlDoc.documentElement.getAttribute('StreamEntropy')
-    nonStreamEntropy = xmlDoc.documentElement.getAttribute('NonStreamEntropy')
-
-    keywords = []
-    dates = []
-
-    #grab all keywords
-    for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes:
-        name = node.getAttribute('Name')
-        count = int(node.getAttribute('Count'))
-        if int(node.getAttribute('HexcodeCount')) > 0:
-            hexCount = int(node.getAttribute('HexcodeCount'))
-        else:
-            hexCount = 0
-        keyword = { 'count':count, 'hexcodecount':hexCount, 'name':name }
-        keywords.append(keyword)
-
-    #grab all date information
-    for node in xmlDoc.documentElement.getElementsByTagName('Dates')[0].childNodes:
-        name = node.getAttribute('Name')
-        value = node.getAttribute('Value')
-        date = { 'name':name, 'value':value }
-        dates.append(date)
-
-    data = { 'countEof':countEof, 'countChatAfterLastEof':countChatAfterLastEof, 'totalEntropy':totalEntropy, 'streamEntropy':streamEntropy, 'nonStreamEntropy':nonStreamEntropy, 'errorOccured':errorOccured, 'errorMessage':errorMessage, 'filename':filename, 'header':header, 'isPdf':isPdf, 'version':version, 'entropy':entropy, 'keywords': { 'keyword': keywords }, 'dates': { 'date':dates} }
-    complete = [ { 'pdfid' : data} ]
-    result = json.dumps(complete)
-    return result
-
-def File2Strings(filename):
-    try:
-        f = open(filename, 'r')
-    except:
-        return None
-    try:
-        return list(map(lambda line:line.rstrip('\n'), f.readlines()))
-    except:
-        return None
-    finally:
-        f.close()
-
-def ProcessAt(argument):
-    if argument.startswith('@'):
-        strings = File2Strings(argument[1:])
-        if strings == None:
-            raise Exception('Error reading %s' % argument)
-        else:
-            return strings
-    else:
-        return [argument]
-
-def AddPlugin(cClass):
-    global plugins
-
-    plugins.append(cClass)
-
-def ExpandFilenameArguments(filenames):
-    return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), [])))
-
-class cPluginParent():
-    onlyValidPDF = True
-
-def LoadPlugins(plugins, verbose):
-    if plugins == '':
-        return
-    scriptPath = os.path.dirname(sys.argv[0])
-    for plugin in sum(map(ProcessAt, plugins.split(',')), []):
-        try:
-            if not plugin.lower().endswith('.py'):
-                plugin += '.py'
-            if os.path.dirname(plugin) == '':
-                if not os.path.exists(plugin):
-                    scriptPlugin = os.path.join(scriptPath, plugin)
-                    if os.path.exists(scriptPlugin):
-                        plugin = scriptPlugin
-            exec(open(plugin, 'r').read())
-        except Exception as e:
-            print('Error loading plugin: %s' % plugin)
-            if verbose:
-                raise e
-
-def PDFiDMain(filenames, options):
-    global plugins
-    plugins = []
-    LoadPlugins(options.plugins, options.verbose)
-
-    if options.csv:
-        if plugins != []:
-            Print(MakeCSVLine((('%s', 'Filename'), ('%s', 'Plugin-name'), ('%s', 'Score'))), options)
-        elif options.select != '':
-            Print('Filename', options)
-
-    for filename in filenames:
-        if options.scan:
-            Scan(filename, options, plugins)
-        else:
-            ProcessFile(filename, options, plugins)
-
-def Main():
-    moredesc = '''
-
-Arguments:
-pdf-file and zip-file can be a single file, several files, and/or @file
-@file: run PDFiD on each file listed in the text file specified
-wildcards are supported
-
-Source code put in the public domain by Didier Stevens, no Copyright
-Use at your own risk
-https://DidierStevens.com'''
-
-    oParser = optparse.OptionParser(usage='usage: %prog [options] [pdf-file|zip-file|url|@file] ...\n' + __description__ + moredesc, version='%prog ' + __version__)
-    oParser.add_option('-s', '--scan', action='store_true', default=False, help='scan the given directory')
-    oParser.add_option('-a', '--all', action='store_true', default=False, help='display all the names')
-    oParser.add_option('-e', '--extra', action='store_true', default=False, help='display extra data, like dates')
-    oParser.add_option('-f', '--force', action='store_true', default=False, help='force the scan of the file, even without proper %PDF header')
-    oParser.add_option('-d', '--disarm', action='store_true', default=False, help='disable JavaScript and auto launch')
-    oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)')
-    oParser.add_option('-c', '--csv', action='store_true', default=False, help='output csv data when using plugins')
-    oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output')
-    oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)')
-    oParser.add_option('-S', '--select', type=str, default='', help='selection expression')
-    oParser.add_option('-o', '--output', type=str, default='', help='output to log file')
-    (options, args) = oParser.parse_args()
-
-    if len(args) == 0:
-        if options.disarm:
-            print('Option disarm not supported with stdin')
-            options.disarm = False
-        if options.scan:
-            print('Option scan not supported with stdin')
-            options.scan = False
-        filenames = ['']
-    else:
-        try:
-            filenames = ExpandFilenameArguments(args)
-        except Exception as e:
-            print(e)
-            return
-    PDFiDMain(filenames, options)
-
-if __name__ == '__main__':
-    Main()
--- a/analyzers/File_Info/lib/pehashng.py
+++ b/analyzers/File_Info/lib/pehashng.py
@ -1,95 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-peHashNG, Portable Executable hash of structural properties
-
-@author: AnyMaster
-https://github.com/AnyMaster/pehashng
-"""
-
-from __future__ import print_function
-import logging
-from bz2 import compress
-from hashlib import sha256
-from struct import pack
-
-from pefile import PE, PEFormatError
-
-__version__ = '1.0.1'
-__author__ = 'AnyMaster'
-
-
-def pehashng(pe_file):
-    """ Return pehashng for PE file, sha256 of PE structural properties.
-
-    :param pe_file: file name or instance of pefile.PE() class
-    :return: SHA256 in hexdigest format, None in case of pefile.PE() error
-    :rtype: str
-    """
-
-    if isinstance(pe_file, PE):
-        exe = pe_file
-    else:
-        try:
-            exe = PE(pe_file, fast_load=True)
-        except PEFormatError as exc:
-            logging.error("Exception in pefile.PE('%s') - %s", pe_file, exc)
-            return
-
-    def align_down_p2(number):
-        return 1 << (number.bit_length() - 1) if number else 0
-
-    def align_up(number, boundary_p2):
-        assert not boundary_p2 & (boundary_p2 - 1), \
-            "Boundary '%d' is not a power of 2" % boundary_p2
-        boundary_p2 -= 1
-        return (number + boundary_p2) & ~ boundary_p2
-
-    def get_dirs_status():
-        dirs_status = 0
-        for idx in range(min(exe.OPTIONAL_HEADER.NumberOfRvaAndSizes, 16)):
-            if exe.OPTIONAL_HEADER.DATA_DIRECTORY[idx].VirtualAddress:
-                dirs_status |= (1 << idx)
-        return dirs_status
-
-    def get_complexity():
-        complexity = 0
-        if section.SizeOfRawData:
-            complexity = (len(compress(section.get_data())) *
-                          7.0 /
-                          section.SizeOfRawData)
-            complexity = 8 if complexity > 7 else int(round(complexity))
-        return complexity
-
-    characteristics_mask = 0b0111111100100011
-    data_directory_mask = 0b0111111001111111
-
-    data = [
-        pack('> H', exe.FILE_HEADER.Characteristics & characteristics_mask),
-        pack('> H', exe.OPTIONAL_HEADER.Subsystem),
-        pack("> I", align_down_p2(exe.OPTIONAL_HEADER.SectionAlignment)),
-        pack("> I", align_down_p2(exe.OPTIONAL_HEADER.FileAlignment)),
-        pack("> Q", align_up(exe.OPTIONAL_HEADER.SizeOfStackCommit, 4096)),
-        pack("> Q", align_up(exe.OPTIONAL_HEADER.SizeOfHeapCommit, 4096)),
-        pack('> H', get_dirs_status() & data_directory_mask)]
-
-    for section in exe.sections:
-        data += [
-            pack('> I', align_up(section.VirtualAddress, 512)),
-            pack('> I', align_up(section.SizeOfRawData, 512)),
-            pack('> B', section.Characteristics >> 24),
-            pack("> B", get_complexity())]
-
-    if not isinstance(pe_file, PE):
-        exe.close()
-    data_sha256 = sha256(b"".join(data)).hexdigest()
-
-    return data_sha256
-
-
-if __name__ == '__main__':
-    import sys
-    if len(sys.argv) < 2:
-        print("Usage: pehashng.py path_to_file")
-        sys.exit(0)
-    print(pehashng(sys.argv[1]), sys.argv[1])
--- a/thehive-templates/FileInfo_3_0/long.html
+++ b/thehive-templates/FileInfo_3_0/long.html
@ -0,0 +1,391 @@
+<div class="report-FILEInfo" ng-if="success" >
+    <style>
+        .report-FILEInfo dl {
+            margin-bottom: 2px;
+        }
+
+    </style>
+
+
+    <br>
+
+    <uib-tabset active="active">
+        <uib-tab index="$index" ng-repeat="result in content.results" heading="{{result.submodule_name}}" disable="tab.disabled">
+            <br>
+            <!--  Basic properties -->
+            <div ng-if="result.submodule_name=='Basic properties'">
+                <div class="panel-body">
+                    <div ng-repeat="r in result.results" class="panel panel-primary">
+                        <div class="panel-heading">
+                            <h4 class="panel-title">
+                               {{r.submodule_section_header}}
+                            </h4>
+                        </div>
+                        <div class="panel-body">
+                            <dl class="dl-horizontal" ng-repeat="(k,v) in r.submodule_section_content">
+                                <dt>{{k}}</dt>
+                                <dd class="wrap">{{v}}</dd>
+                            </dl>
+                        </div>
+                    </div>
+                </div>
+            </div>
+
+            <!--  Oletools -->
+            <div ng-if="result.submodule_name=='Oletools Submodule'">
+
+                <div class="panel-body">
+
+                    <!-- summary -->
+                    <div>
+                        <div ng-if="result['summary']" class="panel panel-primary">
+                            <div class="panel-heading">
+                                <h4 class="panel-title">Summary</h4>
+                            </div>
+                            <div class="panel-body" >
+                                <dl class="dl-horizontal">
+                                    <dt>Olevba version</dt>
+                                    <dd class="wrap">{{result['summary']['Olevba']}}</dd>
+                                </dl>
+                                <dl class="dl-horizontal">
+                                    <dt>Msodde version</dt>
+                                    <dd class="wrap">{{result['summary']['Msodde']}}</dd>
+                                </dl>
+
+
+                                <dl class="dl-horizontal">
+                                        <dt>Oletools scanner</dt>
+                                        <dd class="wrap">
+                                            <span class="label mr-xxxs" ng-repeat="t in result['summary']['taxonomies']" ng-class="{'info': 'label-info', 'safe': 'label-success', 'suspicious': 'label-warning', 'malicious':'label-danger'}[t.level]">
+                                                {{t.namespace}}:{{t.predicate}}={{t.value}}
+                                            </span>
+                                        </dd>
+                                    </dl>
+
+                            </div>
+
+                        </div>
+                    </div>
+
+                    <div ng-repeat="r in result.results">
+                        <uib-accordion ng-if="r.submodule_section_header=='Olevba'">
+
+                            <!--Olevba analysis -->
+                            <div uib-accordion-group class="panel-primary" heading="Olevba analysis" is-open="true" is-disabled="status.isFirstDisabled">
+                                <div ng-if="r.submodule_section_content.analysis" class="table-responsive">
+                                    <table class="table">
+                                        <thead>
+                                            <tr>
+                                                <th >Type</th>
+                                                <th >Keyword</th>
+                                                <th >Description</th>
+                                            </tr>
+                                        </thead>
+                                        <tbody ng-repeat="l in r.submodule_section_content.analysis">
+                                            <tr>
+                                                <td>{{l.type}}</td>
+                                                <td>{{l.keyword}}</td>
+                                                <td>{{l.description}}</td>
+                                            </tr>
+                                        </tbody>
+                                    </table>
+                                </div>
+                            </div>
+
+                            <!-- Macros -->
+                            <div uib-accordion-group  class="panel-primary" heading="Macros" is-open="false">
+                                <div ng-if="r.submodule_section_content.macros != ''" ng-repeat="m in r.submodule_section_content.macros">
+                                    <dl class="dl-horizontal">
+                                        <dt>vba_filename</dt>
+                                        <dd class="wrap">{{m.vba_filename}}</dd>
+                                    </dl>
+                                    <dl class="dl-horizontal">
+                                        <dt>ole_stream</dt>
+                                        <dd class="wrap">{{m.ole_stream}}</dd>
+                                    </dl>
+                                    <dl class="dl-horizontal">
+                                        <dt>code</dt>
+                                        <dd class="wrap"><pre>{{m.code}}</pre></dd>
+                                    </dl>
+                                </div>
+                            </div>
+
+                            <!--Deobfuscated code -->
+                            <div uib-accordion-group ng-if="r.submodule_section_content.code_deobfuscated != ''" class="panel-primary" heading="Deobfuscated code" is-open="false">
+                                <pre>
+                                    {{r.submodule_section_content.code_deobfuscated}}
+                                </pre>
+                            </div>
+                        </uib-accordion>
+                        <div class="panel panel-primary" ng-if="r.submodule_section_header=='Oletools DDE Analysis'">
+                            <div class="panel-heading">
+                                <h4 class="panel-title">{{r.submodule_section_header}}</h4>
+                            </div>
+                            <div class="panel-body">
+                                <dl class="dl-horizontal" ng-repeat="(k,v) in r.submodule_section_content">
+                                    <dt>{{k}}</dt>
+                                    <dd class="wrap"><pre>{{v}}</pre></dd>
+                                </dl>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <!-- Outlook message parser -->
+            <div ng-if="result.submodule_name=='Outlook mail Information'">
+                <div class="panel-body">
+                    <div ng-repeat="r in result.results" class="panel panel-primary">
+                        <div class="panel-heading">
+                            <h4 class="panel-title">
+                               {{r.submodule_section_header}}
+                            </h4>
+                        </div>
+                        <div class="panel-body">
+                            <dl class="dl-horizontal">
+                                <dt>From</dt>
+                                <dd class="wrap">{{r.submodule_section_content.from}}</dd>
+                            </dl>
+                            <dl class="dl-horizontal">
+                                <dt>To</dt>
+                                <dd class="wrap">{{r.submodule_section_content.to}}</dd>
+                            </dl>
+                            <dl class="dl-horizontal">
+                                <dt>Cc</dt>
+                                <dd class="wrap">{{r.submodule_section_content.cc}}</dd>
+                            </dl>
+                            <dl class="dl-horizontal">
+                                <dt>Date</dt>
+                                <dd class="wrap">{{r.submodule_section_content.date}}</dd>
+                            </dl>
+                            <dl class="dl-horizontal">
+                                <dt>Subject</dt>
+                                <dd class="wrap">{{r.submodule_section_content.subject}}</dd>
+                            </dl>
+                            <dl class="dl-horizontal">
+                                <dt>Body</dt>
+                                <dd class="wrap"><pre>{{r.submodule_section_content.body}}</pre></dd>
+                            </dl>
+                            <dl class="dl-horizontal">
+                                <dt>Attachments</dt>
+                                <dd class="wrap">
+                                    <table class="table">
+                                        <thead>
+                                            <tr>
+                                                <th >Filename</th>
+                                                <th >SHA256</th>
+                                            </tr>
+                                        </thead>
+                                        <tbody ng-repeat="a in r.submodule_section_content.attachments">
+                                            <tr>
+                                                <td>{{a.name}}</td>
+                                                <td>{{a.sha256}}</td>
+                                            </tr>
+                                        </tbody>
+                                    </table>
+                                </dd>
+                            </dl>
+                            <dl class="dl-horizontal">
+                                <dt>All Headers</dt>
+                                <dd class="wrap"><pre>{{r.submodule_section_content.header}}</pre></dd>
+                            </dl>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <!-- PDF Information (PDFiD)-->
+            <div ng-if="result.submodule_name=='PDF Information'">
+                <div class="panel-body">
+
+                    <!-- summary -->
+
+                    <div class="panel panel-primary">
+                        <div class="panel-heading" ng-if="result.summary">
+                            <h4 class="panel-title">
+                               Summary
+                            </h4>
+                        </div>
+                        <div class="panel panel-body">
+
+                            <dl class="dl-horizontal" ng-if="result.summary.pdfid">
+                                <dt>PDFiD</dt>
+                                <dd class="wrap">{{result.summary.pdfid}}</dd>
+                            </dl>
+
+                            <dl class="dl-horizontal" ng-if="result.summary.taxonomies != []">
+                                <dt>PDFiD report</dt>
+                                <dd class="wrap">
+                                    <span class="label mr-xxxs" ng-repeat="t in result['summary']['taxonomies']" ng-class="{'info': 'label-info', 'safe': 'label-success', 'suspicious': 'label-warning', 'malicious':'label-danger'}[t.level]">
+                                        {{t.namespace}}:{{t.predicate}}={{t.value}}
+                                    </span>
+                                </dd>
+                            </dl>
+
+                        </div>
+                    </div>
+
+                    <!-- PDFiD -->
+                    <div ng-repeat="r in result.results" class="panel panel-primary">
+                        <div class="panel-heading" ng-if="r.submodule_section_header=='PDFiD Information'">
+                            <h4 class="panel-title">
+                               {{r.submodule_section_header}}
+                            </h4>
+                        </div>
+                        <div class="panel-body">
+                            <div ng-repeat="(k,v) in r.submodule_section_content[0].pdfid">
+                                <dl class="dl-horizontal" ng-if="k!='keywords'">
+                                    <dt>{{k}}</dt>
+                                    <dd class="wrap">{{v}}</dd>
+                                </dl>
+                                <dl class="dl-horizontal" ng-if="k=='keywords'">
+                                    <dt>{{k}}</dt>
+                                    <dd class="wrap">
+                                        <table class="table">
+                                            <thead>
+                                                <tr>
+                                                    <th >name</th>
+                                                    <th >hexcodecount</th>
+                                                    <th >count</th>
+                                                </tr>
+                                            </thead>
+                                            <tbody ng-repeat="a in v.keyword">
+                                                <tr>
+                                                    <td>{{a.name}}</td>
+                                                    <td>{{a.hexcodecount}}</td>
+                                                    <td>{{a.count}}</td>
+                                                </tr>
+                                            </tbody>
+                                        </table>
+                                    </dd>
+                                </dl>
+                            </div>
+
+                        </div>
+                    </div>
+
+
+                </div>
+            </div>
+
+            <!-- PE Information submodule-->
+            <div ng-if="result.submodule_name=='PE Information'">
+                <div class="panel-body">
+
+                    <div class="panel panel-primary">
+                        <div class="panel-heading">
+                            <h4 class="panel-title">
+                               Summary
+                            </h4>
+                        </div>
+                        <div class="panel-body">
+                            <dl class="dl-horizontal">
+                                    <dt>pefile version</dt>
+                                    <dd class="wrap">{{result['summary']['pefile']}}</dd>
+                                </dl>
+                        </div>
+
+                    </div>
+                    <div  ng-repeat="r in result.results">
+                        <uib-accordion>
+
+                            <div uib-accordion-group ng-if="r.submodule_section_header=='Headers'" class="panel-primary" heading="{{r.submodule_section_header}}" is-open="true" is-disabled="status.isFirstDisabled">
+                                <div ng-repeat="h in r.submodule_section_content">
+                                    <dl class="dl-horizontal">
+                                    <dt>{{h.Info}}</dt>
+                                    <dd class="wrap">{{h.Value}}</dd>
+                                    </dl>
+                                </div>
+                            </div>
+
+                            <div uib-accordion-group ng-if="r.submodule_section_header=='Hashes'" class="panel-primary" heading="{{r.submodule_section_header}}" is-open="true" is-disabled="status.isFirstDisabled">
+                                <dl class="dl-horizontal" data-ng-repeat="(k,v) in r.submodule_section_content">
+                                    <dt>{{k}}</dt>
+                                    <dd class="wrap">{{v}}</dd>
+                                </dl>
+                            </div>
+
+                            <div uib-accordion-group ng-if="r.submodule_section_header=='Import Adress Tables'" class="panel-primary" heading="{{r.submodule_section_header}}" is-open="true" is-disabled="status.isFirstDisabled">
+
+                                <div ng-repeat="table in r.submodule_section_content" ng-init="table.showRows=false;">
+                                    <dl class="dl-horizontal">
+                                        <dt>
+                                            <a href="" ng-click="table.showRows = !table.showRows">
+                                                <i class="fa" ng-class="{ true:'fa-minus-square-o', false:'fa-plus-square-o' }[table.showRows]"></i>
+                                            </a>
+                                            {{table.entryname}}
+                                        </dt>
+                                        <dd class="wrap" >
+                                            <ul class="list-unstyled">
+                                                <div ng-if="!table.showRows">
+                                                {{table.symbols.length}}
+                                                items
+                                                </div>
+                                                <div ng-if="table.showRows" ng-repeat="s in table.symbols">{{s}}</div>
+                                            </ul>
+                                        </dd>
+                                    </dl>
+                                </div>
+
+                            </div>
+
+                            <div uib-accordion-group ng-if="r.submodule_section_header=='Sections'" class="panel-primary" heading="{{r.submodule_section_header}}" is-open="true" is-disabled="status.isFirstDisabled">
+                                <table class="table">
+                                    <thead>
+                                        <th>Section</th>
+                                        <th>SizeOfRawData</th>
+                                        <th>Entroy</th>
+                                    </thead>
+                                    <tbody ng-repeat="section in  r.submodule_section_content">
+                                        <tr>
+                                            <td>
+                                                <b>{{section.entryname}}</b>
+                                            </td>
+                                            <td>{{section.SizeOfRawData}}</td>
+                                            <td>{{section.Entropy}}</td>
+                                        </tr>
+                                        <tr>
+                                            <td colspan=3>
+                                                <dl class="dl-horizontal">
+                                                    <dt>
+                                                        <small>MD5</small>
+                                                    </dt>
+                                                    <dd>
+                                                        <small>{{section.MD5}}</small>
+                                                    </dd>
+                                                </dl>
+                                                <dl class="dl-horizontal">
+                                                    <dt>
+                                                        <small>SHA1</small>
+                                                    </dt>
+                                                    <dd>
+                                                        <small>{{section.SHA1}}</small>
+                                                    </dd>
+                                                </dl>
+                                                <dl class="dl-horizontal">
+                                                    <dt>
+                                                        <small>SHA256</small>
+                                                    </dt>
+                                                    <dd>
+                                                        <small>{{section.SHA256}}</small>
+                                                    </dd>
+                                                </dl>
+                                            </td>
+                                        </tr>
+                                    </tbody>
+                                </table>
+                            </div>
+
+                            <div uib-accordion-group ng-if="r.submodule_section_header=='pefile raw output'" class="panel-primary" heading="pefile raw output" is-open="false" is-disabled="status.isFirstDisabled">
+                                <pre>
+                                    {{r.submodule_section_content}}
+                                </pre>
+                            </div>
+
+                        </uib-accordion>
+
+                    </div>
+                </div>
+            </div>
+            <!--Next submodule here -->
+        </uib-tab>
+    </uib-tabset>
+</div>
--- a/thehive-templates/FileInfo_3_0/short.html
+++ b/thehive-templates/FileInfo_3_0/short.html
@ -0,0 +1,3 @@
+<span class="label" ng-repeat="t in content.taxonomies" ng-class="{'info': 'label-info', 'safe': 'label-success', 'suspicious': 'label-warning', 'malicious':'label-danger'}[t.level]">
+    {{t.namespace}}:{{t.predicate}}={{t.value}}
+</span>