[tools] Performance monitoring tooling

2024-11-06 09:35:20 +00:00 · 2014-11-06 17:12:40 -08:00 · 2014-11-06 17:12:40 -08:00 · 131dca2673
commit 131dca2673
parent be020aa4c2
6 changed files with 358 additions and 66 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,3 @@
 # needed for gentable.py
 Jinja2
-
+psutil
--- a/tools/genapi.py
+++ b/tools/genapi.py
@ -3,95 +3,136 @@

 from __future__ import absolute_import
 from __future__ import division
-#from __future__ import print_function
+from __future__ import print_function
 from __future__ import unicode_literals

 import argparse
 import ast
+import json
 import logging
 import os
 import sys
+import uuid

-from gentable import Column, table_name, schema, implementation, table
+from gentable import Column, ForeignKey, \
+    table_name, schema, implementation, description, table

 # the log format for the logging module
 LOG_FORMAT = "%(levelname)s [Line %(lineno)d]: %(message)s"

 CANONICAL_PLATFORMS = {
-	"x": "All Platforms",
-	"darwin": "Darwin (Apple OS X)",
-	"linux": "Ubuntu, CentOS",
+    "x": "All Platforms",
+    "darwin": "Darwin (Apple OS X)",
+    "linux": "Ubuntu, CentOS",
 }

 TEMPLATE_API_DEFINITION = """
 /** @jsx React.DOM */
+/** This page is automatically generated by genapi.py, do not edit! */

 'use strict';

-var API = [
-%s
-];
+var API = %s;

 module.exports = API;

 """

-TEMPLATE_CATEGORY = """
+class NoIndent(object):
+    """Special instance checked object for removing json newlines."""
+    def __init__(self, value):
+        self.value = value

-  {name: "%s", tables: [%s
-  ]}"""
+class Encoder(json.JSONEncoder):
+    """
+    Newlines are such a pain in json-generated output.
+    Use this custom encoder to produce pretty json multiplexed with a more
+    raw json output within.
+    """
+    def __init__(self, *args, **kwargs):
+        super(Encoder, self).__init__(*args, **kwargs)
+        self.kwargs = dict(kwargs)
+        del self.kwargs['indent']
+        self._replacement_map = {}

-TEMPLATE_TABLE = """
+    def default(self, o):
+        if isinstance(o, NoIndent):
+            key = uuid.uuid4().hex
+            self._replacement_map[key] = json.dumps(o.value, **self.kwargs)
+            return "@@%s@@" % (key,)
+        else:
+            return super(Encoder, self).default(o)

-    {name: "%s", columns: [%s
-    ]}"""
-
-TEMPLATE_COLUMN = """
-      {name: "%s", type: "%s", description: "%s", tables: "%s"}"""
+    def encode(self, o):
+        result = super(Encoder, self).encode(o)
+        for k, v in self._replacement_map.iteritems():
+            result = result.replace('"@@%s@@"' % (k,), v)
+        return result

 def gen_api(api):
-	categories = []
-	for category, tables in api.iteritems():
-		tables_output = []
-		for table in tables:
-			columns_output = []
-			for column in table[1]:
-				columns_output.append(TEMPLATE_COLUMN % (
-					column[0], column[1], "", ""))
-			tables_output.append(TEMPLATE_TABLE % (
-				table[0], ", ".join(columns_output)))
-		categories.append(TEMPLATE_CATEGORY % (
-			category, ", ".join(tables_output)))
-	return TEMPLATE_API_DEFINITION % (", ".join(categories))
+    """Apply the api literal object to the template."""
+    api = json.dumps(api,
+        cls=Encoder, sort_keys=True, indent=1, separators=(',', ': '))
+    return TEMPLATE_API_DEFINITION % (api)

 def gen_spec(tree):
-	exec(compile(tree, "<string>", "exec"))
-	schema = [(column.name, column.type) for column in table.schema]
-	return (table.table_name, schema, table.function)
+    """Given a table tree, produce a literal of the table representation."""
+    exec(compile(tree, "<string>", "exec"))
+    columns = [NoIndent({"name": column.name, "type": column.type})
+        for column in table.columns()]
+    foreign_keys = [NoIndent({"column": key.column, "table": key.table})
+        for key in table.foreign_keys()]
+    return {
+        "name": table.table_name,
+        "columns": columns,
+        "foreign_keys": foreign_keys,
+        "function": table.function,
+        "description": table.description,
+    }

 def main(argc, argv):
-	parser = argparse.ArgumentParser("Generate API documentation.")
-	parser.add_argument("--tables", default="osquery/tables/specs",
-		help="Path to osquery table specs")
-	args = parser.parse_args()
+    parser = argparse.ArgumentParser("Generate API documentation.")
+    parser.add_argument("--tables", default="osquery/tables/specs",
+        help="Path to osquery table specs")
+    parser.add_argument("--profile", default=None,
+        help="Add the results of a profile summary to the API.")
+    args = parser.parse_args()

-	logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)

-	if not os.path.exists(args.tables):
-		logging.error("Cannot find path: %s" % args.table)
-		exit(1)
+    if not os.path.exists(args.tables):
+        logging.error("Cannot find path: %s" % (args.tables))
+        exit(1)

-	categories = {}
-	for base, folders, files in os.walk(args.tables):
-		for spec in files:
-			platform = CANONICAL_PLATFORMS[os.path.basename(base)]
-			name = spec.split(".table", 1)[0]
-			if platform not in categories.keys():
-				categories[platform] = []
-			with open(os.path.join(base, spec), "rU") as fh:
-				tree = ast.parse(fh.read())
-				categories[platform].append(gen_spec(tree))
-	print gen_api(categories)
+    profile = {}
+    if args.profile is not None:
+        if not os.path.exists(args.profile):
+            logging.error("Cannot find path: %s" % (args.profile))
+            exit(1)
+        with open(args.profile, "r") as fh:
+            try:
+                profile = json.loads(fh.read())
+            except Exception as e:
+                logging.error("Cannot parse profile data: %s" % (str(e)))
+                exit(2)
+
+    categories = {}
+    for base, folders, files in os.walk(args.tables):
+        for spec_file in files:
+            platform = os.path.basename(base)
+            platform_name = CANONICAL_PLATFORMS[platform]
+            name = spec_file.split(".table", 1)[0]
+            if platform not in categories.keys():
+                categories[platform] = {"name": platform_name, "tables": []}
+            with open(os.path.join(base, spec_file), "rU") as fh:
+                tree = ast.parse(fh.read())
+                table_spec = gen_spec(tree)
+                table_profile = profile.get("%s.%s" % (platform, name), {})
+                table_spec["profile"] = NoIndent(table_profile)
+                categories[platform]["tables"].append(table_spec)
+    categories = [{"key": k, "name": v["name"], "tables": v["tables"]}
+        for k, v in categories.iteritems()]
+    print(gen_api(categories))


 if __name__ == "__main__":
--- a/tools/gentable.py
+++ b/tools/gentable.py
@ -255,6 +255,13 @@ class TableState(Singleton):
        self.impl = ""
        self.function = ""
        self.class_name = ""
+        self.description = ""
+
+    def columns(self):
+      return [i for i in self.schema if isinstance(i, Column)]
+
+    def foreign_keys(self):
+      return [i for i in self.schema if isinstance(i, ForeignKey)]

    def generate(self, path):
        """Generate the virtual table files"""
@ -262,7 +269,7 @@ class TableState(Singleton):
        self.impl_content = jinja2.Template(IMPL_TEMPLATE).render(
            table_name=self.table_name,
            table_name_cc=to_camel_case(self.table_name),
-            schema=self.schema,
+            schema=self.columns(),
            header=self.header,
            impl=self.impl,
            function=self.function,
@ -284,12 +291,23 @@ table = TableState()

 class Column(object):
    """
-    A Column object to get around that fact that list literals in Python are
-    ordered but dictionaries aren't
+    Part of an osquery table schema.
+    Define a column by name and type with an optional description to assist
+    documentation generation and reference.
    """
    def __init__(self, **kwargs):
        self.name = kwargs.get("name", "")
        self.type = kwargs.get("type", "")
+        self.description = kwargs.get("description", "")
+
+class ForeignKey(object):
+  """
+  Part of an osquery table schema. 
+  Loosely define a column in a table spec as a Foreign key in another table.
+  """
+  def __init__(self, **kwargs):
+      self.column = kwargs.get("column", "")
+      self.table = kwargs.get("table", "")

 def table_name(name):
    """define the virtual table name"""
@ -303,8 +321,11 @@ def schema(schema_list):
    table
    """
    logging.debug("- schema")
-    for col in schema_list:
-        logging.debug("  - %s (%s)" % (col.name, col.type))
+    for it in schema_list:
+        if isinstance(it, Column):
+          logging.debug("  - column: %s (%s)" % (it.name, it.type))
+        if isinstance(it, ForeignKey):
+          logging.debug("  - foreign_key: %s (%s)" % (it.column, it.table))
    table.schema = schema_list

 def implementation(impl_string):
@ -329,6 +350,9 @@ def implementation(impl_string):
    table.function = function
    table.class_name = class_name

+def description(text):
+    table.description = text
+
 def main(argc, argv):
    if DEVELOPING:
        logging.basicConfig(format=LOG_FORMAT, level=logging.DEBUG)
--- a/tools/profile.py
+++ b/tools/profile.py
@ -0,0 +1,211 @@
+#!/usr/bin/env python
+# Copyright 2004-present Facebook. All Rights Reserved.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import json
+import os
+import psutil
+import tempfile
+import shutil
+import subprocess
+import sys
+import time
+
+def red(msg):
+    return "\033[41m\033[1;30m %s \033[0m" % str(msg)
+
+def yellow(msg):
+    return "\033[43m\033[1;30m %s \033[0m" % str(msg)
+
+def green(msg):
+    return "\033[42m\033[1;30m %s \033[0m" % str(msg)
+
+def blue(msg):
+    return "\033[46m\033[1;30m %s \033[0m" % str(msg)
+
+KB = 1024 * 1024
+RANGES = {
+    "colors": (blue, green, yellow, red),
+    "utilization": (8, 20, 50),
+    "cpu_time": (0.4, 1, 10),
+    "memory": (8 * KB, 12 * KB, 24 * KB),
+    "fds": (6, 12, 50),
+    "duration": (0.8, 1, 3),
+}
+
+def queries_from_tables(path, restrict):
+    """Construct select all queries from all tables."""
+    # Let the caller limit the tables
+    restrict_tables = [t.strip() for t in restrict.split(",")]
+
+    tables = []
+    for base, folders, files in os.walk(path):
+        for spec in files:
+            spec_platform = os.path.basename(base)
+            table_name = spec.split(".table", 1)[0]
+            if spec_platform not in ["x", platform]:
+                continue
+            # Generate all tables to select from, with abandon.
+            tables.append("%s.%s" % (spec_platform, table_name))
+
+    tables = [t for t in tables if t not in restrict_tables]
+    queries = {t: "SELECT * FROM %s;" % t.split(".", 1)[1] for t in tables}
+    return queries
+
+def get_stats(p, interval=1):
+    """Run psutil and downselect the information."""
+    utilization = p.cpu_percent(interval=interval)
+    return {
+        "utilization": utilization,
+        "counters": p.io_counters() if sys.platform != "darwin" else None,
+        "fds": p.num_fds(),
+        "cpu_times": p.cpu_times(),
+        "memory": p.memory_info_ex(),
+    }
+
+def run_query(shell, query, timeout=0, count=1):
+    """Execute the osquery run testing wrapper with a setup/teardown delay."""
+    start_time = time.time()
+    proc = subprocess.Popen(
+        [shell, "--query", query, "--iterations", str(count),
+            "--delay", "1"],
+        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    
+    p = psutil.Process(pid=proc.pid)
+
+    delay = 0
+    step = 0.5
+
+    percents = []
+    # Calculate the CPU utilization in intervals of 1 second.
+    while p.is_running():
+        try:
+            stats = get_stats(p, step)
+            percents.append(stats["utilization"])
+        except psutil.AccessDenied:
+            break
+        delay += step
+        if timeout > 0 and delay >= timeout + 2:
+            proc.kill()
+            break
+    duration = time.time() - start_time - 2;
+
+    utilization = [p for p in percents if p != 0]
+    avg_utilization = sum(utilization)/len(utilization)
+
+    return {
+        "utilization": avg_utilization,
+        "duration": duration,
+        "memory": stats["memory"].rss,
+        "user_time": stats["cpu_times"].user,
+        "system_time": stats["cpu_times"].system,
+        "cpu_time": stats["cpu_times"].user + stats["cpu_times"].system,
+        "fds": stats["fds"],
+    }
+
+def summary(results, display=False):
+    """Map the results to simple thresholds.""" 
+    def rank(value, ranges):
+        for i, r in enumerate(ranges):
+            if value < r: return i
+        return len(ranges)
+
+    summary_results = {}
+    for name, result in results.iteritems():
+        summary_result = {}
+        for key in RANGES:
+            if key == "colors":
+                continue
+            summary_result[key] = rank(result[key], RANGES[key])
+        if display:
+            print ("%s:" % name, end=" ")
+            for key, v in summary_result.iteritems():
+                print (RANGES["colors"][v](
+                    "%s: %s (%s)" % (key, v, result[key])), end=" ")
+            print ("")
+        summary_results[name] = summary_result
+    return summary_results
+
+def profile(shell, queries, timeout=0, count=10, rounds=1):
+    report = {}
+    for name, query in queries.iteritems():
+        print ("Profiling query: %s" % query)
+        results = {}
+        for i in range(rounds):
+            result = run_query(shell, query, timeout=timeout, count=count)
+            summary({"%s (%d/%d)" % (name, i+1, rounds): result}, display=True)
+            # Store each result round to return an average.
+            for k, v in result.iteritems():
+                results[k] = results.get(k, [])
+                results[k].append(v)
+        average_results = {}
+        for k in results:
+            average_results[k] = sum(results[k])/len(results[k])
+        report[name] = average_results
+        summary({"%s   avg" % name: report[name]}, display=True)
+    return report
+
+if __name__ == "__main__":
+    platform = sys.platform
+    if platform == "linux2":
+        platform = "linux"
+    parser = argparse.ArgumentParser(description=("Profile osquery, "
+        "individual tables, or a set of osqueryd config queries."))
+    parser.add_argument("--restrict", default="",
+        help="Limit to a list of comma-separated tables.")
+    parser.add_argument("--tables", default="./osquery/tables/specs",
+        help="Path to the osquery table specs.")
+    parser.add_argument("--config", default=None,
+        help="Use scheduled queries from a config.")
+    parser.add_argument("--output", default=None,
+        help="Write JSON output to file.")
+    parser.add_argument("--summary", default=False, action="store_true",
+        help="Write a summary instead of stats.")
+    parser.add_argument("--query", default=None,
+        help="Profile a single query.")
+    parser.add_argument("--timeout", default=0, type=int,
+        help="Max seconds a query may run --count times.")
+    parser.add_argument("--count", default=10, type=int,
+        help="Number of times to run each query.")
+    parser.add_argument("--rounds", default=1, type=int,
+        help="Run the profile for multiple rounds and use the average.")
+    parser.add_argument("--shell",
+        default="./build/%s/tools/run" % (platform),
+        help="Path to osquery run wrapper.")
+    args = parser.parse_args()
+
+    if not os.path.exists(args.shell):
+        print ("Cannot find --daemon: %s" % (args.shell))
+        exit(1)
+    if args.config is None and not os.path.exists(args.tables):
+        print ("Cannot find --tables: %s" % (args.tables))
+        exit(1)
+
+    queries = {}
+    if args.config is not None:
+        if not os.path.exists(args.config):
+            print ("Cannot find --config: %s" % (args.config))
+            exit(1)
+        print ("--config is not yet supported.")
+        exit(2)
+    elif args.query is not None:
+        queries["manual"] = args.query
+    else:
+        queries = queries_from_tables(args.tables, args.restrict)
+    
+    # Start the profiling!
+    results = profile(args.shell, queries,
+        timeout=args.timeout, count=args.count, rounds=args.rounds)
+
+    if args.output is not None and not args.summary:
+        with open(args.output, "w") as fh:
+            fh.write(json.dumps(results, indent=1, sort_keys=True))
+    if args.summary is True:
+        with open(args.output, "w") as fh:
+            fh.write(json.dumps(summary(results), indent=1, sort_keys=True))
+
--- a/tools/run.cpp
+++ b/tools/run.cpp
@ -7,11 +7,16 @@

 DEFINE_string(query, "", "query to execute");
 DEFINE_int32(iterations, 1, "times to run the query in question");
+DEFINE_int32(delay, 0, "delay before and after the query");

 int main(int argc, char* argv[]) {
  osquery::initOsquery(argc, argv);

  if (FLAGS_query != "") {
+    if (FLAGS_delay != 0) {
+      ::sleep(FLAGS_delay);
+    }
+
    for (int i = 0; i < FLAGS_iterations; ++i) {
      int err;
      LOG(INFO) << "Executing: " << FLAGS_query;
@ -22,6 +27,10 @@ int main(int argc, char* argv[]) {
      }
      LOG(INFO) << "Query succedded";
    }
+
+    if (FLAGS_delay != 0) {
+      ::sleep(FLAGS_delay);
+    }
  } else {
    LOG(ERROR) << "Usage: run --query=\"<query>\"";
    return 1;
--- a/tools/stress.py
+++ b/tools/stress.py
@ -1,4 +1,10 @@
 #!/usr/bin/env python
+# Copyright 2004-present Facebook. All Rights Reserved.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals

 import time
 import argparse
@ -14,7 +20,8 @@ def red(msg):
 def green(msg):
    return "\033[42m\033[1;37m %s \033[0m" % str(msg)

-def main(args):
+def stress(args):
+    """Small utility to run unittests several times."""
    times = []
    test = args["run"] if args["run"] is not None else ["make", "test"]
    for i in xrange(args["num"]):
@ -24,13 +31,13 @@ def main(args):
        stdout, stderr = proc.communicate()
        times.append(time.time() - start_time)
        if proc.returncode is not 0:
-            print stdout
-            print lightred(stderr)
-            print "%s Test %d failed. (total %6.4fs)" % (
-                red("FAILED"), i+1, sum(times))
+            print (stdout)
+            print (lightred(stderr))
+            print ("%s Test %d failed. (total %6.4fs)" % (
+                red("FAILED"), i+1, sum(times)))
            return proc.returncode
-        print "%s Tests passed (%d/%d) rounds. (average %6.4fs) " % (
-            green("PASSED"), i+1, args["num"], sum(times)/len(times))
+        print ("%s Tests passed (%d/%d) rounds. (average %6.4fs) " % (
+            green("PASSED"), i+1, args["num"], sum(times)/len(times)))
    return 0

 if __name__ == "__main__":
@ -40,4 +47,4 @@ if __name__ == "__main__":
    parser.add_argument("run", nargs="?", help="Run specific test binary")
    args = parser.parse_args()

-    exit(main(vars(args)))
+    exit(stress(vars(args)))