osquery-1/include/osquery/config.h

397 lines
13 KiB
C
Raw Normal View History

/*
* Copyright (c) 2014, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*/
2014-07-31 00:35:19 +00:00
#pragma once
2014-07-31 00:35:19 +00:00
2014-12-01 09:05:46 +00:00
#include <map>
2014-07-31 00:35:19 +00:00
#include <memory>
#include <vector>
#include <boost/noncopyable.hpp>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <boost/thread/shared_mutex.hpp>
#include <osquery/database/results.h>
2015-01-02 05:55:10 +00:00
#include <osquery/flags.h>
2015-01-30 18:44:25 +00:00
#include <osquery/registry.h>
#include <osquery/status.h>
2014-07-31 00:35:19 +00:00
namespace pt = boost::property_tree;
2014-08-15 07:25:30 +00:00
namespace osquery {
2014-07-31 00:35:19 +00:00
2015-01-02 05:55:10 +00:00
/// The builder or invoker may change the default config plugin.
DECLARE_string(config_plugin);
2015-01-02 05:55:10 +00:00
/**
* @brief A native representation of osquery configuration data.
2014-09-15 18:09:33 +00:00
*
* When you use osquery::Config::getInstance(), you are getting a singleton
* handle to interact with the data stored in an instance of this struct.
2014-09-15 18:09:33 +00:00
*/
struct ConfigData {
2014-09-15 18:09:33 +00:00
/// A vector of all of the queries that are scheduled to execute.
std::map<std::string, ScheduledQuery> schedule;
2014-12-01 09:05:46 +00:00
std::map<std::string, std::string> options;
std::map<std::string, std::vector<std::string> > files;
/// All data catches optional/plugin-parsed configuration keys.
pt::ptree all_data;
2014-07-31 00:35:19 +00:00
};
class ConfigParserPlugin;
Major YARA refactor and enhancements 1. Rename yara_matches to yara_events. 2. Add support for Config::getParser(). - This returns a ConfigPluginRef, which is the ConfigParser for the given key. - Being able to get the parser is useful because the YARAConfigParserPlugin uses it to store the compiled rules as an attribute. 3. Finish rename and use ConfigParserPlugin. - Finish the table rename to yara_events. - Use the new ConfigParserPlugin interface to parse the YARA configuration. The file_paths and signatures are stored in the ConfigParserPlugin named "yara" under the key "yara". The rules are compiled and stored as a private attribute of the same ConfigParserPlugin object. Here is an example config using this new structure: { // Description of the YARA feature. "yara": { "signatures": { // Each key is an arbitrary group name to give the signatures listed "sig_group_1": [ "/Users/wxs/foo.sig", "/Users/wxs//bar.sig" ], "sig_group_2": [ "/Users/wxs/baz.sig" ] }, "file_paths": { // Each key is a key from file_paths // The value is a list of signature groups to run when an event fires // These will be watched for and scanned when the event framework // fire off an event to yara_events table "system_binaries": [ "sig_group_1" ], "tmp": [ "sig_group_1", "sig_group_2" ] } }, // Paths to watch for filesystem events "file_paths": { "system_binaries": [ "/usr/bin/%", "/usr/sbin/%" ], "tmp": [ "/Users/wxs/tmp/%%" ] } } - Currently the signature file must be an absolute path. 3. Move common YARA code to yara_utils. - In preparation for the yara table (different from yara_events) I'm moving the common YARA code into a separate place which is shared between the two tables. 4. Add yara table. - This allows you to do things like: ```sql select * from yara where path="/bin/ls" and sigfile="/tmp/foo.sig"; select * from yara where path="/bin/ls" and sig_group="sig_group_1"; ``` - The latter will use the signature grouping from the config. 5. Check for keys not existing.
2015-04-17 20:03:43 +00:00
typedef std::shared_ptr<ConfigParserPlugin> ConfigPluginRef;
/**
* @brief A singleton that exposes accessors to osquery's configuration data.
2014-09-15 18:09:33 +00:00
*
* osquery has two types on configurations. Things that don't change during
* the execution of the process should be configured as command-line
* arguments. Things that can change during the lifetime of program execution
* should be defined using the osquery::config::Config class and the pluggable
* plugin interface that is included with it.
2014-09-15 18:09:33 +00:00
*/
class Config : private boost::noncopyable {
2014-08-15 07:25:30 +00:00
public:
/**
* @brief The primary way to access the Config singleton.
2014-09-15 18:09:33 +00:00
*
* osquery::config::Config::getInstance() provides access to the Config
* singleton
2014-09-15 18:09:33 +00:00
*
* @code{.cpp}
* auto config = osquery::config::Config::getInstance();
* @endcode
2014-09-15 18:09:33 +00:00
*
* @return a singleton instance of Config.
2014-09-15 18:09:33 +00:00
*/
static Config& getInstance() {
static Config cfg;
return cfg;
}
2014-07-31 00:35:19 +00:00
2015-01-02 05:55:10 +00:00
/**
* @brief Call the genConfig method of the config retriever plugin.
*
* This may perform a resource load such as TCP request or filesystem read.
*/
static Status load();
2015-01-02 05:55:10 +00:00
/**
* @brief Update the internal config data.
*
* @param config A map of domain or namespace to config data.
* @return If the config changes were applied.
*/
static Status update(const std::map<std::string, std::string>& config);
/**
2014-12-11 01:35:21 +00:00
* @brief Calculate the has of the osquery config
*
* @return The MD5 of the osquery config
*/
static Status getMD5(std::string& hashString);
2014-12-11 01:35:21 +00:00
2015-05-14 04:19:54 +00:00
/**
* @brief Adds a new query to the schedule queries
*
*/
static void addScheduledQuery(const std::string name, const std::string query, const int interval);
/**
* @brief Checks if the query is already added to the schedule
*
*/
static bool checkScheduledQuery(const std::string query);
/**
* @brief Checks if the query name is already added to the schedule
*
*/
static bool checkScheduledQueryName(const std::string query_name);
/**
* @brief Check to ensure that the config is accessible and properly
* formatted
*
* @return an instance of osquery::Status, indicating the success or failure
* of the operation.
*/
static Status checkConfig();
2014-08-15 07:25:30 +00:00
private:
/**
* @brief Default constructor.
2014-09-15 18:09:33 +00:00
*
* Since instances of Config should only be created via getInstance(),
* Config's constructor is private
*/
Config() : force_merge_success_(false) {}
~Config(){}
Config(Config const&);
void operator=(Config const&);
2014-12-11 01:35:21 +00:00
/**
* @brief Uses the specified config retriever to populate a string with the
* config JSON.
*
* Internally, genConfig checks to see if there was a config retriever
* specified on the command-line. If there was, it checks to see if that
* config retriever actually exists. If it does, it gets used to generate
* configuration data. If it does not, an error is logged.
*
* @return status indicating the success or failure of the operation.
2014-12-11 01:35:21 +00:00
*/
static Status genConfig();
2015-02-16 02:15:06 +00:00
/// Merge a retrieved config source JSON into a working ConfigData.
static Status mergeConfig(const std::string& source, ConfigData& conf);
public:
/**
* @brief Record performance (monitoring) information about a scheduled query.
*
* The daemon and query scheduler will optionally record process metadata
* before and after executing each query. This can be compared and reported
* on an interval or within the osquery_schedule table.
*
* The config consumes and calculates the optional performance differentials.
* It would also be possible to store this in the RocksDB backing store or
* report directly to a LoggerPlugin sink. The Config is the most appropriate
* as the metrics are transient to the process running the schedule and apply
* to the updates/changes reflected in the schedule, from the config.
*
* @param name The unique name of the scheduled item
* @param delay Number of seconds (wall time) taken by the query
* @param size Number of characters generated by query
* @param t0 the process row before the query
* @param t1 the process row after the query
2014-09-15 18:09:33 +00:00
*/
static void recordQueryPerformance(const std::string& name,
size_t delay,
size_t size,
const Row& t0,
const Row& t1);
private:
/// The raw osquery config data in a native format
ConfigData data_;
/// The raw JSON source map from the config plugin.
std::map<std::string, std::string> raw_;
/// The reader/writer config data mutex.
boost::shared_mutex mutex_;
/// Enforce merge success.
bool force_merge_success_;
private:
static const pt::ptree& getParsedData(const std::string& parser);
Major YARA refactor and enhancements 1. Rename yara_matches to yara_events. 2. Add support for Config::getParser(). - This returns a ConfigPluginRef, which is the ConfigParser for the given key. - Being able to get the parser is useful because the YARAConfigParserPlugin uses it to store the compiled rules as an attribute. 3. Finish rename and use ConfigParserPlugin. - Finish the table rename to yara_events. - Use the new ConfigParserPlugin interface to parse the YARA configuration. The file_paths and signatures are stored in the ConfigParserPlugin named "yara" under the key "yara". The rules are compiled and stored as a private attribute of the same ConfigParserPlugin object. Here is an example config using this new structure: { // Description of the YARA feature. "yara": { "signatures": { // Each key is an arbitrary group name to give the signatures listed "sig_group_1": [ "/Users/wxs/foo.sig", "/Users/wxs//bar.sig" ], "sig_group_2": [ "/Users/wxs/baz.sig" ] }, "file_paths": { // Each key is a key from file_paths // The value is a list of signature groups to run when an event fires // These will be watched for and scanned when the event framework // fire off an event to yara_events table "system_binaries": [ "sig_group_1" ], "tmp": [ "sig_group_1", "sig_group_2" ] } }, // Paths to watch for filesystem events "file_paths": { "system_binaries": [ "/usr/bin/%", "/usr/sbin/%" ], "tmp": [ "/Users/wxs/tmp/%%" ] } } - Currently the signature file must be an absolute path. 3. Move common YARA code to yara_utils. - In preparation for the yara table (different from yara_events) I'm moving the common YARA code into a separate place which is shared between the two tables. 4. Add yara table. - This allows you to do things like: ```sql select * from yara where path="/bin/ls" and sigfile="/tmp/foo.sig"; select * from yara where path="/bin/ls" and sig_group="sig_group_1"; ``` - The latter will use the signature grouping from the config. 5. Check for keys not existing.
2015-04-17 20:03:43 +00:00
static const ConfigPluginRef getParser(const std::string& parser);
/// A default, empty property tree used when a missing parser is requested.
pt::ptree empty_data_;
private:
/// Config accessors, `ConfigDataInstance`, are the forced use of the config
/// data. This forces the caller to use a shared read lock.
friend class ConfigDataInstance;
private:
FRIEND_TEST(ConfigTests, test_locking);
};
/**
* @brief All accesses to the Config's data must request a ConfigDataInstance.
*
* This class will request a read-only lock of the config's changeable internal
* data structures such as query schedule, options, monitored files, etc.
*
* Since a variable config plugin may implement `update` calls, internal uses
* of config data needs simple read and write locking.
*/
class ConfigDataInstance {
public:
ConfigDataInstance() : lock_(Config::getInstance().mutex_) {}
~ConfigDataInstance() { lock_.unlock(); }
/// Helper accessor for Config::data_.schedule.
const std::map<std::string, ScheduledQuery> schedule() const {
return Config::getInstance().data_.schedule;
}
/// Helper accessor for Config::data_.options.
const std::map<std::string, std::string>& options() const {
return Config::getInstance().data_.options;
}
/// Helper accessor for Config::data_.files.
const std::map<std::string, std::vector<std::string> >& files() const {
return Config::getInstance().data_.files;
}
const pt::ptree& getParsedData(const std::string& parser) const {
return Config::getParsedData(parser);
Implement YARA table. Currently only for OS X, will port to others soon. Also need to add tests. Remove old comment and add loading message. Implement YARA table for Linux. Use mask properly. Use the various masks to specify the kinds of events we are interested in. This removes the need to do the dirty "DELETED" check when the event fires. Make getYARAFiles return a const map. Switch to LOG(WARNING) and emit error number. Add vim .swp files to .gitignore. Add yara_utils.(c|h). Start to condense common code between the Linux and Darwin YARA tables into a yara_utils.h. Right now it includes a function to compile rules and store the results back in the map, indexed by category. It also has the callback used by YARA when a rule is processed. I can not move much more than that for the row creation code because the structures used in the event callback are slightly different. Include a better error message. The errors are still printed by the compiler callback, but this will allow my future work to return a Status from the event initialization to print a useful message in summary. Make Subscriber init() return Status. Each EventSubscriber::init() now returns a Status. If the init() fails for any reason the EventSubscriber is still stored but the failure is tracked. EventSubscribers now have a state member, which represents the current state of the subscriber. The current supported states are: uninitialized, running, paused, failed. Currently the only meaningful ones are running and failed, but I put paused in there as a forward-looking feature. Subscriptions now have a subscriber_name member. This is used in EventPublisherPlugin::fire() as a lookup to get the EventSubscriber and check the state. If the EventSubscriber is not running the event will not fire. Only the EventSubscribers on OS X are using this. I'll do the Linux implementation next. Chase the init() changes to Linux. This brings the Linux YARA table in line with the OS X one. Require a EventSubscriberID when creating a subscription. Now that Subscriptions are "tied" to EventSubscribers you must create a Subscription with the name of the Subscriber it is for. This is because when the event fires the list of Subscriptions is walked and the name is used to lookup the EventSubscriber and make sure it is in the running state. Fix various tests. Some tests would fire an event with only a Subscription, which is no longer a valid thing to do. For these tests an EventSubscription is created and registered in the EventFactory. When Subscriptions are created pass the name of the EventSubscriber to them. In some cases where no event is ever fired it is fine to pass a bogus name. Fix inotify tests. Move a test down so the class is defined and make sure to create an EventSubscriber and use it properly. Add support for yara to provision.sh. Right now this grabs yara 3.3.0 and applies the patch to fix min() and max(), which is commit fc4696c8b725be1ac099d340359c8d550d116041 in the yara repo. This has been tested under Ubuntu 14.04 only. Remove NOMINMAX. This is no longer necessary after the patch was backported to 3.3.0. Revert "Add support for yara to provision.sh." This reverts commit a8bd371498c0979f070adeff23d05571882ac3f1. Use vendored YARA code in third-party. This switches to using the YARA code contained in third-party, including the patch to fix min/max macros. Fix mismerge. Remove unused function after merge. Well, soon to be unused as soon as I fix up the Linux YARA table. ;) Chase config changes. Make the Linux YARA table use ConfigDataInstance along with files() and yaraFiles().
2015-03-10 13:22:16 +00:00
}
Major YARA refactor and enhancements 1. Rename yara_matches to yara_events. 2. Add support for Config::getParser(). - This returns a ConfigPluginRef, which is the ConfigParser for the given key. - Being able to get the parser is useful because the YARAConfigParserPlugin uses it to store the compiled rules as an attribute. 3. Finish rename and use ConfigParserPlugin. - Finish the table rename to yara_events. - Use the new ConfigParserPlugin interface to parse the YARA configuration. The file_paths and signatures are stored in the ConfigParserPlugin named "yara" under the key "yara". The rules are compiled and stored as a private attribute of the same ConfigParserPlugin object. Here is an example config using this new structure: { // Description of the YARA feature. "yara": { "signatures": { // Each key is an arbitrary group name to give the signatures listed "sig_group_1": [ "/Users/wxs/foo.sig", "/Users/wxs//bar.sig" ], "sig_group_2": [ "/Users/wxs/baz.sig" ] }, "file_paths": { // Each key is a key from file_paths // The value is a list of signature groups to run when an event fires // These will be watched for and scanned when the event framework // fire off an event to yara_events table "system_binaries": [ "sig_group_1" ], "tmp": [ "sig_group_1", "sig_group_2" ] } }, // Paths to watch for filesystem events "file_paths": { "system_binaries": [ "/usr/bin/%", "/usr/sbin/%" ], "tmp": [ "/Users/wxs/tmp/%%" ] } } - Currently the signature file must be an absolute path. 3. Move common YARA code to yara_utils. - In preparation for the yara table (different from yara_events) I'm moving the common YARA code into a separate place which is shared between the two tables. 4. Add yara table. - This allows you to do things like: ```sql select * from yara where path="/bin/ls" and sigfile="/tmp/foo.sig"; select * from yara where path="/bin/ls" and sig_group="sig_group_1"; ``` - The latter will use the signature grouping from the config. 5. Check for keys not existing.
2015-04-17 20:03:43 +00:00
const ConfigPluginRef getParser(const std::string& parser) const {
return Config::getParser(parser);
}
/// Helper accessor for Config::data_.all_data.
const pt::ptree& data() const { return Config::getInstance().data_.all_data; }
private:
/// A read lock on the reader/writer config data accessor/update mutex.
boost::shared_lock<boost::shared_mutex> lock_;
2014-07-31 00:35:19 +00:00
};
2015-01-30 18:44:25 +00:00
/**
* @brief Superclass for the pluggable config component.
*
* In order to make the distribution of configurations to hosts running
* osquery, we take advantage of a plugin interface which allows you to
* integrate osquery with your internal configuration distribution mechanisms.
* You may use ZooKeeper, files on disk, a custom solution, etc. In order to
* use your specific configuration distribution system, one simply needs to
* create a custom subclass of ConfigPlugin. That subclass should implement
* the ConfigPlugin::genConfig method.
*
* Consider the following example:
*
* @code{.cpp}
* class TestConfigPlugin : public ConfigPlugin {
* public:
* virtual std::pair<osquery::Status, std::string> genConfig() {
* std::string config;
* auto status = getMyConfig(config);
* return std::make_pair(status, config);
* }
* };
*
* REGISTER(TestConfigPlugin, "config", "test");
2015-01-30 18:44:25 +00:00
* @endcode
*/
class ConfigPlugin : public Plugin {
public:
/**
* @brief Virtual method which should implemented custom config retrieval
*
* ConfigPlugin::genConfig should be implemented by a subclasses of
* ConfigPlugin which needs to retrieve config data in a custom way.
*
* @return a pair such that pair.first is an osquery::Status instance which
* indicates the success or failure of config retrieval. If pair.first
* indicates that config retrieval was successful, then the config data
* should be returned in pair.second.
*/
virtual Status genConfig(std::map<std::string, std::string>& config) = 0;
2015-01-30 18:44:25 +00:00
Status call(const PluginRequest& request, PluginResponse& response);
};
/// Helper merged and parsed property tree.
typedef pt::ptree ConfigTree;
/**
* @brief A pluggable configuration parser.
*
* An osquery config instance is populated from JSON using a ConfigPlugin.
* That plugin may update the config data asynchronously and read from
* several sources, as is the case with "filesystem" and reading multiple files.
*
* A ConfigParserPlugin will receive the merged configuration at osquery start
* and the updated (still merged) config if any ConfigPlugin updates the
* instance asynchronously. Each parser specifies a set of top-level JSON
* keys to receive. The config instance will auto-merge the key values
* from multiple sources if they are dictionaries or lists.
*
* If a top-level key is a dictionary, each source with the top-level key
* will have its own dictionary keys merged and replaced based on the lexical
* order of sources. For the "filesystem" config plugin this is the lexical
* sorting of filenames. If the top-level key is a list, each source with the
* top-level key will have its contents appended.
*
* Each config parser plugin will live alongside the config instance for the
* life of the osquery process. The parser may perform actions at config load
* and config update "time" as well as keep its own data members and be
* accessible through the Config class API.
*/
class ConfigParserPlugin : public Plugin {
protected:
/**
* @brief Return a list of top-level config keys to receive in updates.
*
* The ::update method will receive a map of these keys with a JSON-parsed
* property tree of configuration data.
*
* @return A list of string top-level JSON keys.
*/
virtual std::vector<std::string> keys() = 0;
/**
* @brief Receive a merged property tree for each top-level config key.
*
* Called when the Config instance is initially loaded with data from the
* active config plugin and when it is updated via an async ConfigPlugin
* update. Every config parser will receive a map of merged data for each key
* they requested in keys().
*
* @param config A JSON-parsed property tree map.
* @return Failure if the parser should no longer receive updates.
*/
virtual Status update(const std::map<std::string, ConfigTree>& config) = 0;
protected:
/// Allow the config parser to keep some global state.
pt::ptree data_;
private:
Status setUp();
private:
/// Config::update will call all appropriate parser updates.
friend class Config;
/// A config data instance implements a read/write lock around data_ access.
friend class ConfigDataInstance;
};
/**
* @brief Calculate a splayed integer based on a variable splay percentage
*
* The value of splayPercent must be between 1 and 100. If it's not, the
* value of original will be returned.
*
* @param original The original value to be modified
* @param splayPercent The percent in which to splay the original value by
*
* @return The modified version of original
*/
int splayValue(int original, int splayPercent);
2015-01-30 18:44:25 +00:00
/**
* @brief Config plugin registry.
*
* This creates an osquery registry for "config" which may implement
* ConfigPlugin. A ConfigPlugin's call API should make use of a genConfig
* after reading JSON data in the plugin implementation.
*/
CREATE_REGISTRY(ConfigPlugin, "config");
/**
* @brief ConfigParser plugin registry.
*
* This creates an osquery registry for "config_parser" which may implement
* ConfigParserPlugin. A ConfigParserPlugin should not export any call actions
* but rather have a simple property tree-accessor API through Config.
*/
CREATE_LAZY_REGISTRY(ConfigParserPlugin, "config_parser");
2014-08-15 07:25:30 +00:00
}