osquery-1/tools/codegen/genapi.py
Jesse Kornblum c7355b19aa Update osquery licensing wording (#5452)
Summary:
Pull Request resolved: https://github.com/facebook/osquery/pull/5452

As suggested in another diff, this diff updates the language we use to describe the osquery licensing terms. We are changing all instances of

//This source code is licensed as defined on the LICENSE file found in the root directory of this source tree.//

to

//This source code is licensed in accordance with the terms specified in the LICENSE file found in the root directory of this source tree.//

We accomplish this with a codemod:

  $ codemod -md xplat/osquery/oss --extensions cpp,h,in,py,sh,mm,ps1 "(.\s+)This source code is licensed as defined on the LICENSE file found in the(.*)root directory of this source tree\." "\1This source code is licensed in accordance with the terms specified in\2the LICENSE file found in the root directory of this source tree."

Reviewed By: fmanco

Differential Revision: D14131290

fbshipit-source-id: 52c90da342263e2a80f5a678ecd760c19cf7513e
2019-02-19 10:59:48 -08:00

308 lines
10 KiB
Python
Executable File

#!/usr/bin/env python
# Copyright (c) 2014-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed in accordance with the terms specified in
# the LICENSE file found in the root directory of this source tree.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import ast
import json
import logging
import os
import sys
import uuid
import subprocess
from gentable import *
from utils import platform
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
sys.path.append(SCRIPT_DIR + "/../tests")
# the log format for the logging module
LOG_FORMAT = "%(levelname)s [Line %(lineno)d]: %(message)s"
CANONICAL_PLATFORMS = {
"specs": "All Platforms",
"darwin": "Darwin (Apple OS X)",
"linux": "Ubuntu, CentOS",
"freebsd": "FreeBSD",
"posix": "POSIX-compatible Plaforms",
"windows": "Microsoft Windows",
"utility": "Utility",
"yara": "YARA",
"smart": "SMART",
"lldpd": "LLDPD",
"sleuthkit": "The Sleuth Kit",
"macwin": "MacOS and Windows",
"linwin": "Linux and Windows"
}
TEMPLATE_API_DEFINITION = """
{
"tables": %s,
"events": [
]
}
"""
class NoIndent(object):
"""Special instance checked object for removing json newlines."""
def __init__(self, value):
self.value = value
if 'type' in self.value and isinstance(self.value['type'], DataType):
self.value['type'] = str(self.value['type'])
class Encoder(json.JSONEncoder):
"""
Newlines are such a pain in json-generated output.
Use this custom encoder to produce pretty json multiplexed with a more
raw json output within.
"""
def __init__(self, *args, **kwargs):
super(Encoder, self).__init__(*args, **kwargs)
self.kwargs = dict(kwargs)
del self.kwargs['indent']
self._replacement_map = {}
def default(self, o):
if isinstance(o, NoIndent):
key = uuid.uuid4().hex
self._replacement_map[key] = json.dumps(o.value, **self.kwargs)
return "@@%s@@" % (key,)
else:
return super(Encoder, self).default(o)
def encode(self, o):
result = super(Encoder, self).encode(o)
for k, v in self._replacement_map.iteritems():
result = result.replace('"@@%s@@"' % (k,), v)
return result
def gen_api_json(api):
"""Apply the api literal object to the template."""
api = json.dumps(
api, cls=Encoder, sort_keys=True, indent=1, separators=(',', ': ')
)
return TEMPLATE_API_DEFINITION % (api)
def gen_spec(tree):
"""Given a table tree, produce a literal of the table representation."""
exec(compile(tree, "<string>", "exec"))
columns = [NoIndent({
"name": column.name,
"type": column.type,
"description": column.description,
"options": column.options,
}) for column in table.columns()]
foreign_keys = [NoIndent({"column": key.column, "table": key.table})
for key in table.foreign_keys()]
return {
"name": table.table_name,
"columns": columns,
"foreign_keys": foreign_keys,
"function": table.function,
"description": table.description,
"attributes": table.attributes,
"examples": table.examples,
}
def gen_diff(api_old_path, api_new_path):
"""Quick and dirty way to view table API changes."""
with open(api_old_path, 'r') as fh:
api_old = json.loads(fh.read())
with open(api_new_path, 'r') as fh:
api_new = json.loads(fh.read())
# Prune table lists into maps
old_tables = {}
new_tables = {}
for category in api_new["tables"]:
for table in category["tables"]:
new_tables["%s:%s" % (category["name"], table["name"])] = table
for category in api_old["tables"]:
for table in category["tables"]:
old_tables["%s:%s" % (category["name"], table["name"])] = table
# Iterate backwards then forward to detect added/removed.
tables_added = []
tables_removed = []
columns_added = []
columns_removed = []
for name, table in new_tables.iteritems():
if name not in old_tables:
tables_added.append(name)
continue
for column in table["columns"]:
old_columns = [c["name"] for c in old_tables[name]["columns"]]
if column["name"] not in old_columns:
columns_added.append("%s:%s:%s:%s" % (category["name"],
table["name"], column["name"], column["type"]))
for name, table in old_tables.iteritems():
if name not in new_tables:
tables_removed.append(name)
continue
for column in table["columns"]:
new_columns = [c["name"] for c in new_tables[name]["columns"]]
if column["name"] not in new_columns:
columns_removed.append("%s:%s:%s:%s" % (category["name"],
table["name"], column["name"], column["type"]))
# Sort then pretty print (md) the changes.
tables_added.sort()
for name in tables_added:
print("Added table `%s` to %s" % tuple(name.split(":")[::-1]))
columns_added.sort()
for name in columns_added:
column = name.split(":")
print("Added column `%s` (`%s`) to table `%s`" % (column[2], column[3],
column[1]))
tables_removed.sort()
for name in tables_removed:
print("Removed table `%s` from %s" % tuple(name.split(":")[::-1]))
columns_removed.sort()
for name in columns_removed:
column = name.split(":")
print("Removed column `%s` (`%s`) from table `%s`" % (column[2],
column[3], column[1]))
def gen_api(tables_path, profile={}):
blacklist = None
blacklist_path = os.path.join(tables_path, "blacklist")
if os.path.exists(blacklist_path):
with open(blacklist_path, "r") as fh:
blacklist = fh.read()
categories = {}
for base, _, files in os.walk(tables_path):
for spec_file in files:
if spec_file[0] == '.' or spec_file.find("example") == 0:
continue
# Exclude blacklist specific file
if spec_file == 'blacklist':
continue
platform = os.path.basename(base)
# Exclude kernel tables
if platform in ['kernel']:
continue
platform_name = CANONICAL_PLATFORMS[platform]
name = spec_file.split(".table", 1)[0]
if platform not in categories.keys():
categories[platform] = {"name": platform_name, "tables": []}
with open(os.path.join(base, spec_file), "rU") as fh:
tree = ast.parse(fh.read())
table_spec = gen_spec(tree)
table_profile = profile.get("%s.%s" % (platform, name), {})
table_spec["profile"] = NoIndent(table_profile)
table_spec["blacklisted"] = is_blacklisted(table_spec["name"],
blacklist=blacklist)
categories[platform]["tables"].append(table_spec)
categories = [{"key": k, "name": v["name"], "tables": v["tables"]}
for k, v in categories.iteritems()]
return categories
def main(argc, argv):
parser = argparse.ArgumentParser("Generate API documentation.")
parser.add_argument(
"--debug", default=False, action="store_true",
help="Output debug messages (when developing)"
)
parser.add_argument(
"--tables", default="specs",
help="Path to osquery table specs"
)
parser.add_argument(
"--profile", default=None,
help="Add the results of a profile summary to the API."
)
parser.add_argument(
"--diff", default=False, action="store_true",
help="Compare API changes API_PREVIOUS API_CURRENT"
)
parser.add_argument(
"--output", default=False, action="store_true",
help="Create output file as the version tagged."
)
parser.add_argument(
"--directory", default=".",
help="Directory to use for the output file."
)
parser.add_argument("vars", nargs="*")
args = parser.parse_args()
if args.debug:
logging.basicConfig(format=LOG_FORMAT, level=logging.DEBUG)
else:
logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
if args.diff:
if len(args.vars) < 2:
logging.error("If using --diff you must supply API_OLD API_NEW")
exit(1)
gen_diff(args.vars[0], args.vars[1])
exit(0)
if not os.path.exists(args.tables):
logging.error("Cannot find path: %s" % (args.tables))
exit(1)
profile = {}
if args.profile is not None:
if not os.path.exists(args.profile):
logging.error("Cannot find path: %s" % (args.profile))
exit(1)
with open(args.profile, "r") as fh:
try:
profile = json.loads(fh.read())
except Exception as e:
logging.error("Cannot parse profile data: %s" % (str(e)))
exit(2)
# Read in the optional list of blacklisted tables, then generate
# categories.
api = gen_api(args.tables, profile)
# Output file will be the version with json extension, otherwise stdout
if args.output:
print('[+] creating tables json')
cmd = ['git', 'describe', '--tags', 'HEAD']
proc = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
out, err = proc.communicate()
output_file = out.split("\n")[0] + ".json"
if args.directory[-1:] == '/':
output_path = args.directory + output_file
else:
output_path = args.directory + '/' + output_file
with open(output_path, 'w') as f:
print(gen_api_json(api), file=f)
print('[+] tables json file created at %s' % (output_path))
else:
print(gen_api_json(api))
if __name__ == "__main__":
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
main(len(sys.argv), sys.argv)