/* * Copyright (c) 2014-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. An additional grant * of patent rights can be found in the PATENTS file in the same directory. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "osquery/core/conversions.h" namespace pt = boost::property_tree; namespace osquery { /// The config plugin must be known before reading options. CLI_FLAG(string, config_plugin, "filesystem", "Config plugin name"); CLI_FLAG(bool, config_check, false, "Check the format of an osquery config and exit"); CLI_FLAG(bool, config_dump, false, "Dump the contents of the configuration"); DECLARE_string(config_plugin); DECLARE_string(pack_delimiter); DECLARE_bool(disable_events); /** * @brief The backing store key name for the executing query. * * The config maintains schedule statistics and tracks failed executions. * On process or worker resume an initializer or config may check if the * resume was the result of a failure during an executing query. */ const std::string kExecutingQuery = "executing_query"; const std::string kFailedQueries = "failed_queries"; // The config may be accessed and updated asynchronously; use mutexes. Mutex config_schedule_mutex_; Mutex config_performance_mutex_; Mutex config_files_mutex_; Mutex config_hash_mutex_; Mutex config_valid_mutex_; using PackRef = std::shared_ptr; /** * The schedule is an iterable collection of Packs. When you iterate through * a schedule, you only get the packs that should be running on the host that * you're currently operating on. */ class Schedule : private boost::noncopyable { public: /// Under the hood, the schedule is just a list of the Pack objects using container = std::list; /** * @brief Create a schedule maintained by the configuration. * * This will check for previously executing queries. If any query was * executing it is considered in a 'dirty' state and should generate logs. * The schedule may also choose to blacklist this query. */ Schedule(); /** * @brief This class' iteration function * * Our step operation will be called on each element in packs_. It is * responsible for determining if that element should be returned as the * next iterator element or skipped. */ struct Step { bool operator()(PackRef& pack) { return pack->shouldPackExecute(); } }; /// Add a pack to the schedule void add(PackRef&& pack) { remove(pack->getName(), pack->getSource()); packs_.push_back(pack); } /// Remove a pack, by name. void remove(const std::string& pack) { remove(pack, ""); } /// Remove a pack by name and source. void remove(const std::string& pack, const std::string& source) { packs_.remove_if([pack, source](PackRef& p) { if (p->getName() == pack && (p->getSource() == source || source == "")) { Config::getInstance().removeFiles(source + FLAGS_pack_delimiter + p->getName()); return true; } return false; }); } /// Remove all packs by source. void removeAll(const std::string& source) { packs_.remove_if(([source](PackRef& p) { if (p->getSource() == source) { Config::getInstance().removeFiles(source + FLAGS_pack_delimiter + p->getName()); return true; } return false; })); } /// Boost gives us a nice template for maintaining the state of the iterator using iterator = boost::filter_iterator; iterator begin() { return iterator(packs_.begin(), packs_.end()); } iterator end() { return iterator(packs_.end(), packs_.end()); } PackRef& last() { return packs_.back(); } private: /// Underlying storage for the packs container packs_; /** * @brief The schedule will check and record previously executing queries. * * If a query is found on initialization, the name will be recorded, it is * possible to skip previously failed queries. */ std::string failed_query_; /** * @brief List of blacklisted queries. * * A list of queries that are blacklisted from executing due to prior * failures. If a query caused a worker to fail it will be recorded during * the next execution and saved to the blacklist. */ std::map blacklist_; private: friend class Config; }; void restoreScheduleBlacklist(std::map& blacklist) { std::string content; getDatabaseValue(kPersistentSettings, kFailedQueries, content); auto blacklist_pairs = osquery::split(content, ":"); if (blacklist_pairs.size() == 0 || blacklist_pairs.size() % 2 != 0) { // Nothing in the blacklist, or malformed data. return; } size_t current_time = getUnixTime(); for (size_t i = 0; i < blacklist_pairs.size() / 2; i++) { // Fill in a mapping of query name to time the blacklist expires. long int expire = 0; safeStrtol(blacklist_pairs[(i * 2) + 1], 10, expire); if (expire > 0 && current_time < (size_t)expire) { blacklist[blacklist_pairs[(i * 2)]] = (size_t)expire; } } } void saveScheduleBlacklist(const std::map& blacklist) { std::string content; for (const auto& query : blacklist) { if (!content.empty()) { content += ":"; } content += query.first + ":" + std::to_string(query.second); } setDatabaseValue(kPersistentSettings, kFailedQueries, content); } Schedule::Schedule() { if (Registry::external()) { // Extensions should not restore or save schedule details. return; } // Parse the schedule's query blacklist from backing storage. restoreScheduleBlacklist(blacklist_); // Check if any queries were executing when the tool last stopped. getDatabaseValue(kPersistentSettings, kExecutingQuery, failed_query_); if (!failed_query_.empty()) { LOG(WARNING) << "Scheduled query may have failed: " << failed_query_; setDatabaseValue(kPersistentSettings, kExecutingQuery, ""); // Add this query name to the blacklist and save the blacklist. blacklist_[failed_query_] = getUnixTime() + 86400; saveScheduleBlacklist(blacklist_); } } Config::Config() : schedule_(std::make_shared()), valid_(false), start_time_(std::time(nullptr)) {} void Config::addPack(const std::string& name, const std::string& source, const pt::ptree& tree) { WriteLock wlock(config_schedule_mutex_); try { schedule_->add(std::make_shared(name, source, tree)); if (schedule_->last()->shouldPackExecute()) { applyParsers(source + FLAGS_pack_delimiter + name, tree, true); } } catch (const std::exception& e) { LOG(WARNING) << "Error adding pack: " << name << ": " << e.what(); } } void Config::removePack(const std::string& pack) { WriteLock wlock(config_schedule_mutex_); return schedule_->remove(pack); } void Config::addFile(const std::string& source, const std::string& category, const std::string& path) { WriteLock wlock(config_files_mutex_); files_[source][category].push_back(path); } void Config::removeFiles(const std::string& source) { WriteLock wlock(config_files_mutex_); if (files_.count(source)) { FileCategories().swap(files_[source]); } } void Config::scheduledQueries( std::function predicate) { WriteLock lock(config_schedule_mutex_); for (const PackRef& pack : *schedule_) { for (const auto& it : pack->getSchedule()) { std::string name = it.first; // The query name may be synthetic. if (pack->getName() != "main" && pack->getName() != "legacy_main") { name = "pack" + FLAGS_pack_delimiter + pack->getName() + FLAGS_pack_delimiter + it.first; } // They query may have failed and been added to the schedule's blacklist. if (schedule_->blacklist_.count(name) > 0) { auto blacklisted_query = schedule_->blacklist_.find(name); if (getUnixTime() > blacklisted_query->second) { // The blacklisted query passed the expiration time (remove). schedule_->blacklist_.erase(blacklisted_query); saveScheduleBlacklist(schedule_->blacklist_); } else { // The query is still blacklisted. continue; } } // Call the predicate. predicate(name, it.second); } } } void Config::packs(std::function predicate) { WriteLock lock(config_schedule_mutex_); for (PackRef& pack : schedule_->packs_) { predicate(pack); } } Status Config::load() { valid_ = false; auto& config_plugin = Registry::getActive("config"); if (!Registry::exists("config", config_plugin)) { return Status(1, "Missing config plugin " + config_plugin); } PluginResponse response; auto status = Registry::call("config", {{"action", "genConfig"}}, response); if (!status.ok()) { return status; } // if there was a response, parse it and update internal state valid_ = true; if (response.size() > 0) { if (FLAGS_config_dump) { // If config checking is enabled, debug-write the raw config data. for (const auto& content : response[0]) { fprintf(stdout, "{\"%s\": %s}\n", content.first.c_str(), content.second.c_str()); } // Instead of forcing the shutdown, request one since the config plugin // may have started services. Initializer::requestShutdown(); } status = update(response[0]); } loaded_ = true; return status; } /** * @brief Boost's 1.59 property tree based JSON parser does not accept comments. * * For semi-compatibility with existing configurations we will attempt to strip * hash and C++ style comments. It is OK for the config update to be latent * as it is a single event. But some configuration plugins may update running * configurations. */ inline void stripConfigComments(std::string& json) { std::string sink; for (auto& line : osquery::split(json, "\n")) { boost::trim(line); if (line.size() > 0 && line[0] == '#') { continue; } if (line.size() > 1 && line[0] == '/' && line[1] == '/') { continue; } sink += line + '\n'; } json = sink; } Status Config::updateSource(const std::string& source, const std::string& json) { // Compute a 'synthesized' hash using the content before it is parsed. hashSource(source, json); // Remove all packs from this source. schedule_->removeAll(source); // Remove all files from this source. removeFiles(source); // load the config (source.second) into a pt::ptree pt::ptree tree; try { auto clone = json; stripConfigComments(clone); std::stringstream json_stream; json_stream << clone; pt::read_json(json_stream, tree); } catch (const pt::json_parser::json_parser_error& e) { return Status(1, "Error parsing the config JSON"); } // extract the "schedule" key and store it as the main pack if (tree.count("schedule") > 0 && !Registry::external()) { auto& schedule = tree.get_child("schedule"); pt::ptree main_pack; main_pack.add_child("queries", schedule); addPack("main", source, main_pack); } if (tree.count("scheduledQueries") > 0 && !Registry::external()) { auto& scheduled_queries = tree.get_child("scheduledQueries"); pt::ptree queries; for (const std::pair& query : scheduled_queries) { auto query_name = query.second.get("name", ""); if (query_name.empty()) { return Status(1, "Error getting name from legacy scheduled query"); } queries.add_child(query_name, query.second); } pt::ptree legacy_pack; legacy_pack.add_child("queries", queries); addPack("legacy_main", source, legacy_pack); } // extract the "packs" key into additional pack objects if (tree.count("packs") > 0 && !Registry::external()) { auto& packs = tree.get_child("packs"); for (const auto& pack : packs) { auto value = packs.get(pack.first, ""); if (value.empty()) { // The pack is a JSON object, treat the content as pack data. addPack(pack.first, source, pack.second); } else { genPack(pack.first, source, value); } } } applyParsers(source, tree, false); return Status(0, "OK"); } Status Config::genPack(const std::string& name, const std::string& source, const std::string& target) { // If the pack value is a string (and not a JSON object) then it is a // resource to be handled by the config plugin. PluginResponse response; PluginRequest request = { {"action", "genPack"}, {"name", name}, {"value", target}}; Registry::call("config", request, response); if (response.size() == 0 || response[0].count(name) == 0) { return Status(1, "Invalid plugin response"); } try { pt::ptree pack_tree; std::stringstream pack_stream; pack_stream << response[0][name]; pt::read_json(pack_stream, pack_tree); addPack(name, source, pack_tree); } catch (const pt::json_parser::json_parser_error& e) { LOG(WARNING) << "Error parsing the pack JSON: " << name; } return Status(0); } void Config::applyParsers(const std::string& source, const pt::ptree& tree, bool pack) { // Iterate each parser. for (const auto& plugin : Registry::all("config_parser")) { std::shared_ptr parser = nullptr; try { parser = std::dynamic_pointer_cast(plugin.second); } catch (const std::bad_cast& e) { LOG(ERROR) << "Error casting config parser plugin: " << plugin.first; } if (parser == nullptr || parser.get() == nullptr) { continue; } // For each key requested by the parser, add a property tree reference. std::map parser_config; for (const auto& key : parser->keys()) { if (tree.count(key) > 0) { parser_config[key] = tree.get_child(key); } else { parser_config[key] = pt::ptree(); } } // The config parser plugin will receive a copy of each property tree for // each top-level-config key. The parser may choose to update the config's // internal state parser->update(source, parser_config); } } Status Config::update(const std::map& config) { // A config plugin may call update from an extension. This will update // the config instance within the extension process and the update must be // reflected in the core. if (Registry::external()) { for (const auto& source : config) { PluginRequest request = { {"action", "update"}, {"source", source.first}, {"data", source.second}, }; // A "update" registry item within core should call the core's update // method. The config plugin call action handling must also know to // update. Registry::call("config", "update", request); } } // Iterate though each source and overwrite config data. // This will add/overwrite pack data, append to the schedule, change watched // files, set options, etc. // Before this occurs, take an opportunity to purge stale state. purge(); for (const auto& source : config) { auto status = updateSource(source.first, source.second); if (!status.ok()) { return status; } } if (loaded_) { // The config has since been loaded. // This update call is most likely a response to an async update request // from a config plugin. This request should request all plugins to update. for (const auto& registry : Registry::all()) { if (registry.first == "event_publisher" || registry.first == "event_subscriber") { continue; } registry.second->configure(); } // If events are enabled configure the subscribers before publishers. if (!FLAGS_disable_events) { Registry::registry("event_subscriber")->configure(); Registry::registry("event_publisher")->configure(); } } return Status(0, "OK"); } void Config::purge() { // The first use of purge is removing expired query results. std::vector saved_queries; scanDatabaseKeys(kQueries, saved_queries); const auto& schedule = this->schedule_; auto queryExists = [&schedule](const std::string& query_name) { for (const auto& pack : schedule->packs_) { const auto& pack_queries = pack->getSchedule(); if (pack_queries.count(query_name)) { return true; } } return false; }; WriteLock lock(config_schedule_mutex_); // Iterate over each result set in the database. for (const auto& saved_query : saved_queries) { if (queryExists(saved_query)) { continue; } std::string content; getDatabaseValue(kPersistentSettings, "timestamp." + saved_query, content); if (content.empty()) { // No timestamp is set for this query, perhaps this is the first time // query results expiration is applied. setDatabaseValue(kPersistentSettings, "timestamp." + saved_query, std::to_string(getUnixTime())); continue; } // Parse the timestamp and compare. size_t last_executed = 0; try { last_executed = boost::lexical_cast(content); } catch (const boost::bad_lexical_cast& e) { // Erase the timestamp as is it potentially corrupt. deleteDatabaseValue(kPersistentSettings, "timestamp." + saved_query); continue; } if (last_executed < getUnixTime() - 592200) { // Query has not run in the last week, expire results and interval. deleteDatabaseValue(kQueries, saved_query); deleteDatabaseValue(kPersistentSettings, "interval." + saved_query); deleteDatabaseValue(kPersistentSettings, "timestamp." + saved_query); VLOG(1) << "Expiring results for scheduled query: " << saved_query; } } } void Config::reset() { schedule_ = std::make_shared(); std::map().swap(performance_); std::map().swap(files_); std::map().swap(hash_); valid_ = false; loaded_ = false; start_time_ = 0; } void Config::recordQueryPerformance(const std::string& name, size_t delay, size_t size, const Row& r0, const Row& r1) { WriteLock lock(config_performance_mutex_); if (performance_.count(name) == 0) { performance_[name] = QueryPerformance(); } // Grab access to the non-const schedule item. auto& query = performance_.at(name); BIGINT_LITERAL diff = 0; if (!r1.at("user_time").empty() && !r0.at("user_time").empty()) { diff = AS_LITERAL(BIGINT_LITERAL, r1.at("user_time")) - AS_LITERAL(BIGINT_LITERAL, r0.at("user_time")); if (diff > 0) { query.user_time += diff; } } if (!r1.at("system_time").empty() && !r0.at("system_time").empty()) { diff = AS_LITERAL(BIGINT_LITERAL, r1.at("system_time")) - AS_LITERAL(BIGINT_LITERAL, r0.at("system_time")); if (diff > 0) { query.system_time += diff; } } if (!r1.at("resident_size").empty() && !r0.at("resident_size").empty()) { diff = AS_LITERAL(BIGINT_LITERAL, r1.at("resident_size")) - AS_LITERAL(BIGINT_LITERAL, r0.at("resident_size")); if (diff > 0) { // Memory is stored as an average of RSS changes between query executions. query.average_memory = (query.average_memory * query.executions) + diff; query.average_memory = (query.average_memory / (query.executions + 1)); } } query.wall_time += delay; query.output_size += size; query.executions += 1; query.last_executed = getUnixTime(); // Clear the executing query (remove the dirty bit). setDatabaseValue(kPersistentSettings, kExecutingQuery, ""); } void Config::recordQueryStart(const std::string& name) { // There should only ever be a single executing query in the schedule. setDatabaseValue(kPersistentSettings, kExecutingQuery, name); // Store the time this query name last executed for later results eviction. // When configuration updates occur the previous schedule is searched for // 'stale' query names, aka those that have week-old or longer last execute // timestamps. Offending queries have their database results purged. setDatabaseValue( kPersistentSettings, "timestamp." + name, std::to_string(getUnixTime())); } void Config::getPerformanceStats( const std::string& name, std::function predicate) { if (performance_.count(name) > 0) { WriteLock lock(config_performance_mutex_); predicate(performance_.at(name)); } } void Config::hashSource(const std::string& source, const std::string& content) { WriteLock wlock(config_hash_mutex_); hash_[source] = hashFromBuffer(HASH_TYPE_MD5, &(content.c_str())[0], content.size()); } Status Config::getMD5(std::string& hash) { if (!valid_) { return Status(1, "Current config is not valid"); } WriteLock lock(config_hash_mutex_); std::vector buffer; buffer.reserve(hash_.size() * 32); auto add = [&buffer](const std::string& text) { for (const auto& c : text) { buffer.push_back(c); } }; for (const auto& it : hash_) { add(it.second); } hash = hashFromBuffer(HASH_TYPE_MD5, &buffer[0], buffer.size()); return Status(0, "OK"); } const std::shared_ptr Config::getParser( const std::string& parser) { if (!Registry::exists("config_parser", parser, true)) { return nullptr; } auto plugin = Registry::get("config_parser", parser); return std::dynamic_pointer_cast(plugin); } void Config::files( std::function& files)> predicate) { WriteLock lock(config_files_mutex_); for (const auto& it : files_) { for (const auto& category : it.second) { predicate(category.first, category.second); } } } Status ConfigPlugin::genPack(const std::string& name, const std::string& value, std::string& pack) { return Status(1, "Not implemented"); } Status ConfigPlugin::call(const PluginRequest& request, PluginResponse& response) { if (request.count("action") == 0) { return Status(1, "Config plugins require an action in PluginRequest"); } if (request.at("action") == "genConfig") { std::map config; auto stat = genConfig(config); response.push_back(config); return stat; } else if (request.at("action") == "genPack") { if (request.count("name") == 0 || request.count("value") == 0) { return Status(1, "Missing name or value"); } std::string pack; auto stat = genPack(request.at("name"), request.at("value"), pack); response.push_back({{request.at("name"), pack}}); return stat; } else if (request.at("action") == "update") { if (request.count("source") == 0 || request.count("data") == 0) { return Status(1, "Missing source or data"); } return Config::getInstance().update( {{request.at("source"), request.at("data")}}); } return Status(1, "Config plugin action unknown: " + request.at("action")); } Status ConfigParserPlugin::setUp() { for (const auto& key : keys()) { data_.put(key, ""); } return Status(0, "OK"); } }