mirror of
https://github.com/valitydev/osquery-1.git
synced 2024-11-07 18:08:53 +00:00
558 lines
17 KiB
C++
558 lines
17 KiB
C++
/**
|
|
* Copyright (c) 2014-present, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under both the Apache 2.0 license (found in the
|
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
* in the COPYING file in the root directory of this source tree).
|
|
* You may select, at your option, one of the above-listed licenses.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <map>
|
|
#include <set>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include <boost/property_tree/ptree.hpp>
|
|
|
|
#include <osquery/core.h>
|
|
#include <osquery/status.h>
|
|
|
|
#include "osquery/core/json.h"
|
|
|
|
namespace osquery {
|
|
|
|
/**
|
|
* @brief A variant type for the SQLite type affinities.
|
|
*/
|
|
using RowData = std::string;
|
|
|
|
/**
|
|
* @brief A single row from a database query
|
|
*
|
|
* Row is a simple map where individual column names are keys, which map to
|
|
* the Row's respective value
|
|
*/
|
|
using Row = std::map<std::string, RowData>;
|
|
|
|
/**
|
|
* @brief A vector of column names associated with a query
|
|
*
|
|
* ColumnNames is a vector of the column names, in order, returned by a query.
|
|
*/
|
|
using ColumnNames = std::vector<std::string>;
|
|
|
|
/**
|
|
* @brief Serialize a Row into a property tree
|
|
*
|
|
* @param r the Row to serialize
|
|
* @param tree the output property tree
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeRow(const Row& r, boost::property_tree::ptree& tree);
|
|
Status serializeRowRJ(const Row& r, rapidjson::Document& d);
|
|
|
|
/**
|
|
* @brief Serialize a Row object into a JSON string
|
|
*
|
|
* @param r the Row to serialize
|
|
* @param json the output JSON string
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeRowJSON(const Row& r, std::string& json);
|
|
Status serializeRowJSONRJ(const Row& r, std::string& json);
|
|
|
|
/**
|
|
* @brief Deserialize a Row object from a property tree
|
|
*
|
|
* @param tree the input property tree
|
|
* @param r the output Row structure
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status deserializeRow(const boost::property_tree::ptree& tree, Row& r);
|
|
Status deserializeRowRJ(const rapidjson::Value& v, Row& r);
|
|
|
|
/**
|
|
* @brief Deserialize a Row object from a JSON string
|
|
*
|
|
* @param json the input JSON string
|
|
* @param r the output Row structure
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status deserializeRowJSON(const std::string& json, Row& r);
|
|
Status deserializeRowJSONRJ(const std::string& json, Row& r);
|
|
|
|
/**
|
|
* @brief The result set returned from a osquery SQL query
|
|
*
|
|
* QueryData is the canonical way to represent the results of SQL queries in
|
|
* osquery. It's just a vector of Rows.
|
|
*/
|
|
using QueryData = std::vector<Row>;
|
|
|
|
/**
|
|
* @brief Set representation result returned from a osquery SQL query
|
|
*
|
|
* QueryDataSet - It's set of Rows for fast search of a specific row.
|
|
*/
|
|
using QueryDataSet = std::multiset<Row>;
|
|
|
|
/**
|
|
* @brief Serialize a QueryData object into a property tree
|
|
*
|
|
* @param q the QueryData to serialize
|
|
* @param tree the output property tree
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeQueryData(const QueryData& q,
|
|
boost::property_tree::ptree& tree);
|
|
Status serializeQueryDataRJ(const QueryData& q, rapidjson::Document& d);
|
|
|
|
/**
|
|
* @brief Serialize a QueryData object into a property tree
|
|
*
|
|
* @param q the QueryData to serialize
|
|
* @param cols the TableColumn vector indicating column order
|
|
* @param tree the output property tree
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeQueryData(const QueryData& q,
|
|
const ColumnNames& cols,
|
|
boost::property_tree::ptree& tree);
|
|
Status serializeQueryDataRJ(const QueryData& q,
|
|
const ColumnNames& cols,
|
|
rapidjson::Document& d);
|
|
|
|
/**
|
|
* @brief Serialize a QueryData object into a JSON string
|
|
*
|
|
* @param q the QueryData to serialize
|
|
* @param json the output JSON string
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeQueryDataJSON(const QueryData& q, std::string& json);
|
|
Status serializeQueryDataJSONRJ(const QueryData& q, std::string& json);
|
|
|
|
/// Inverse of serializeQueryData, convert property tree to QueryData.
|
|
Status deserializeQueryData(const boost::property_tree::ptree& tree,
|
|
QueryData& qd);
|
|
/// Inverse of serializeQueryData, convert property tree to QueryDataSet.
|
|
Status deserializeQueryData(const boost::property_tree::ptree& tree,
|
|
QueryDataSet& qd);
|
|
|
|
Status deserializeQueryDataRJ(const rapidjson::Value& v, QueryData& qd);
|
|
|
|
/// Inverse of serializeQueryDataJSON, convert a JSON string to QueryData.
|
|
Status deserializeQueryDataJSON(const std::string& json, QueryData& qd);
|
|
/// Inverse of serializeQueryDataJSON, convert a JSON string to QueryDataSet.
|
|
|
|
Status deserializeQueryDataJSON(const std::string& json, QueryDataSet& qd);
|
|
|
|
/**
|
|
* @brief Data structure representing the difference between the results of
|
|
* two queries
|
|
*
|
|
* The representation of two diffed QueryData result sets. Given and old and
|
|
* new QueryData, DiffResults indicates the "added" subset of rows and the
|
|
* "removed" subset of rows.
|
|
*/
|
|
struct DiffResults {
|
|
/// vector of added rows
|
|
QueryData added;
|
|
|
|
/// vector of removed rows
|
|
QueryData removed;
|
|
|
|
/// equals operator
|
|
bool operator==(const DiffResults& comp) const {
|
|
return (comp.added == added) && (comp.removed == removed);
|
|
}
|
|
|
|
/// not equals operator
|
|
bool operator!=(const DiffResults& comp) const {
|
|
return !(*this == comp);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Serialize a DiffResults object into a property tree
|
|
*
|
|
* @param d the DiffResults to serialize
|
|
* @param tree the output property tree
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeDiffResults(const DiffResults& d,
|
|
boost::property_tree::ptree& tree);
|
|
|
|
/**
|
|
* @brief Serialize a DiffResults object into a JSON string
|
|
*
|
|
* @param d the DiffResults to serialize
|
|
* @param json the output JSON string
|
|
*
|
|
* @return an instance of osquery::Status, indicating the success or failure
|
|
* of the operation
|
|
*/
|
|
Status serializeDiffResultsJSON(const DiffResults& d, std::string& json);
|
|
|
|
/**
|
|
* @brief Diff QueryDataSet object and QueryData object
|
|
* and create a DiffResults object
|
|
*
|
|
* @param old_ the "old" set of results
|
|
* @param new_ the "new" set of results
|
|
*
|
|
* @return a DiffResults object which indicates the change from old_ to new_
|
|
*
|
|
* @see DiffResults
|
|
*/
|
|
DiffResults diff(QueryDataSet& old_, QueryData& new_);
|
|
|
|
/**
|
|
* @brief Add a Row to a QueryData if the Row hasn't appeared in the QueryData
|
|
* already
|
|
*
|
|
* Note that this function will iterate through the QueryData list until a
|
|
* given Row is found (or not found). This shouldn't be that significant of an
|
|
* overhead for most use-cases, but it's worth keeping in mind before you use
|
|
* this in it's current state.
|
|
*
|
|
* @param q the QueryData list to append to
|
|
* @param r the Row to add to q
|
|
*
|
|
* @return true if the Row was added to the QueryData, false if it was not
|
|
*/
|
|
bool addUniqueRowToQueryData(QueryData& q, const Row& r);
|
|
|
|
/**
|
|
* @brief Construct a new QueryData from an existing one, replacing all
|
|
* non-ASCII characters with their \\u encoding.
|
|
*
|
|
* This function is intended as a workaround for
|
|
* https://svn.boost.org/trac/boost/ticket/8883,
|
|
* and will allow rows containing data with non-ASCII characters to be stored in
|
|
* the database and parsed back into a property tree.
|
|
*
|
|
* @param oldData the old QueryData to copy
|
|
* @param newData the new escaped QueryData object
|
|
*/
|
|
void escapeQueryData(const QueryData& oldData, QueryData& newData);
|
|
|
|
/**
|
|
* @brief performance statistics about a query
|
|
*/
|
|
struct QueryPerformance {
|
|
/// Number of executions.
|
|
size_t executions{0};
|
|
|
|
/// Last UNIX time in seconds the query was executed successfully.
|
|
size_t last_executed{0};
|
|
|
|
/// Total wall time taken
|
|
unsigned long long int wall_time{0};
|
|
|
|
/// Total user time (cycles)
|
|
unsigned long long int user_time{0};
|
|
|
|
/// Total system time (cycles)
|
|
unsigned long long int system_time{0};
|
|
|
|
/// Average memory differentials. This should be near 0.
|
|
unsigned long long int average_memory{0};
|
|
|
|
/// Total characters, bytes, generated by query.
|
|
unsigned long long int output_size{0};
|
|
};
|
|
|
|
/**
|
|
* @brief Represents the relevant parameters of a scheduled query.
|
|
*
|
|
* Within the context of osqueryd, a scheduled query may have many relevant
|
|
* attributes. Those attributes are represented in this data structure.
|
|
*/
|
|
struct ScheduledQuery {
|
|
/// The SQL query.
|
|
std::string query;
|
|
|
|
/// How often the query should be executed, in second.
|
|
size_t interval{0};
|
|
|
|
/// A temporary splayed internal.
|
|
size_t splayed_interval{0};
|
|
|
|
/**
|
|
* @brief Queries are blacklisted based on logic in the configuration.
|
|
*
|
|
* Most calls to inspect scheduled queries will abstract away the blacklisting
|
|
* concept and only return non-blacklisted queries. The config may be asked
|
|
* to return all queries, thus it is important to capture this optional data.
|
|
*/
|
|
bool blacklisted{false};
|
|
|
|
/// Set of query options.
|
|
std::map<std::string, bool> options;
|
|
|
|
ScheduledQuery() = default;
|
|
|
|
/// equals operator
|
|
bool operator==(const ScheduledQuery& comp) const {
|
|
return (comp.query == query) && (comp.interval == interval);
|
|
}
|
|
|
|
/// not equals operator
|
|
bool operator!=(const ScheduledQuery& comp) const {
|
|
return !(*this == comp);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Query results from a schedule, snapshot, or ad-hoc execution.
|
|
*
|
|
* When a scheduled query yields new results, we need to log that information
|
|
* to our upstream logging receiver. A QueryLogItem contains metadata and
|
|
* results in potentially-differential form for a logger.
|
|
*/
|
|
struct QueryLogItem {
|
|
/// Differential results from the query.
|
|
DiffResults results;
|
|
|
|
/// Optional snapshot results, no differential applied.
|
|
QueryData snapshot_results;
|
|
|
|
/// The name of the scheduled query.
|
|
std::string name;
|
|
|
|
/// The identifier (hostname, or uuid) of the host.
|
|
std::string identifier;
|
|
|
|
/// The time that the query was executed, seconds as UNIX time.
|
|
size_t time{0};
|
|
|
|
/// The epoch at the time the query was executed
|
|
uint64_t epoch{};
|
|
|
|
/// Query execution counter for current epoch
|
|
uint64_t counter{0};
|
|
|
|
/// The time that the query was executed, an ASCII string.
|
|
std::string calendar_time;
|
|
|
|
/// A set of additional fields to emit with the log line.
|
|
std::map<std::string, std::string> decorations;
|
|
|
|
/// equals operator
|
|
bool operator==(const QueryLogItem& comp) const {
|
|
return (comp.results == results) && (comp.name == name);
|
|
}
|
|
|
|
/// not equals operator
|
|
bool operator!=(const QueryLogItem& comp) const {
|
|
return !(*this == comp);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @brief Serialize a QueryLogItem object into a property tree
|
|
*
|
|
* @param item the QueryLogItem to serialize
|
|
* @param tree the output property tree
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeQueryLogItem(const QueryLogItem& item,
|
|
boost::property_tree::ptree& tree);
|
|
|
|
/**
|
|
* @brief Serialize a QueryLogItem object into a JSON string
|
|
*
|
|
* @param item the QueryLogItem to serialize
|
|
* @param json the output JSON string
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeQueryLogItemJSON(const QueryLogItem& item, std::string& json);
|
|
|
|
/// Inverse of serializeQueryLogItem, convert property tree to QueryLogItem.
|
|
Status deserializeQueryLogItem(const boost::property_tree::ptree& tree,
|
|
QueryLogItem& item);
|
|
|
|
/// Inverse of serializeQueryLogItem, convert a JSON string to QueryLogItem.
|
|
Status deserializeQueryLogItemJSON(const std::string& json, QueryLogItem& item);
|
|
|
|
/**
|
|
* @brief Serialize a QueryLogItem object into a property tree
|
|
* of events, a list of actions.
|
|
*
|
|
* @param item the QueryLogItem to serialize
|
|
* @param tree the output property tree
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeQueryLogItemAsEvents(const QueryLogItem& item,
|
|
boost::property_tree::ptree& tree);
|
|
|
|
/**
|
|
* @brief Serialize a QueryLogItem object into a JSON string of events,
|
|
* a list of actions.
|
|
*
|
|
* @param i the QueryLogItem to serialize
|
|
* @param items vector of JSON output strings
|
|
*
|
|
* @return Status indicating the success or failure of the operation
|
|
*/
|
|
Status serializeQueryLogItemAsEventsJSON(const QueryLogItem& i,
|
|
std::vector<std::string>& items);
|
|
|
|
/**
|
|
* @brief Interact with the historical on-disk storage for a given query.
|
|
*/
|
|
class Query {
|
|
public:
|
|
/**
|
|
* @brief Constructor which sets up necessary parameters of a Query object.
|
|
*
|
|
* Given a query, this constructor calculates the value of columnFamily_,
|
|
* which can be accessed via the getColumnFamilyName getter method.
|
|
*
|
|
* @param name The query name.
|
|
* @param q a SheduledQuery struct.
|
|
*/
|
|
explicit Query(std::string name, ScheduledQuery q)
|
|
: query_(std::move(q)), name_(std::move(name)) {}
|
|
|
|
/**
|
|
* @brief Serialize the data in RocksDB into a useful data structure
|
|
*
|
|
* This method retrieves the data from RocksDB and returns the data in a
|
|
* std::multiset, in-order to apply binary search in diff function.
|
|
*
|
|
* @param results the output QueryDataSet struct.
|
|
*
|
|
* @return the success or failure of the operation.
|
|
*/
|
|
Status getPreviousQueryResults(QueryDataSet& results) const;
|
|
|
|
/**
|
|
* @brief Get the epoch associated with the previous query results.
|
|
*
|
|
* This method retrieves the epoch associated with the results data that was
|
|
* was stored in rocksdb.
|
|
*
|
|
* @return the epoch associated with the previous query results.
|
|
*/
|
|
uint64_t getPreviousEpoch() const;
|
|
|
|
/**
|
|
* @brief Get the query invocation counter.
|
|
*
|
|
* This method returns query invocation counter. If the query is a new query,
|
|
* 0 is returned. Otherwise the counter associated with the query is retrieved
|
|
* from database and incremented by 1.
|
|
*
|
|
* @param new_query Whether or not the query is new.
|
|
*
|
|
* @return the query invocation counter.
|
|
*/
|
|
uint64_t getQueryCounter(bool new_query) const;
|
|
|
|
/**
|
|
* @brief Check if a given scheduled query exists in the database.
|
|
*
|
|
* @return true if the scheduled query already exists in the database.
|
|
*/
|
|
bool isQueryNameInDatabase() const;
|
|
|
|
/**
|
|
* @brief Check if a query (not query name) is 'new' or altered.
|
|
*
|
|
* @return true if the scheduled query has not been altered.
|
|
*/
|
|
bool isNewQuery() const;
|
|
|
|
/**
|
|
* @brief Add a new set of results to the persistent storage.
|
|
*
|
|
* Given the results of the execution of a scheduled query, add the results
|
|
* to the database using addNewResults.
|
|
*
|
|
* @param qd the QueryData object, which has the results of the query.
|
|
* @param epoch the epoch associatted with QueryData
|
|
* @param counter the output that holds the query execution counter.
|
|
*
|
|
* @return the success or failure of the operation.
|
|
*/
|
|
Status addNewResults(QueryData qd, uint64_t epoch, uint64_t& counter) const;
|
|
|
|
/**
|
|
* @brief Add a new set of results to the persistent storage and get back
|
|
* the differential results.
|
|
*
|
|
* Given the results of an execution of a scheduled query, add the results
|
|
* to the database using addNewResults and get back a data structure
|
|
* indicating what rows in the query's results have changed.
|
|
*
|
|
* @param qd the QueryData object containing query results to store.
|
|
* @param epoch the epoch associated with QueryData
|
|
* @param counter the output that holds the query execution counter.
|
|
* @param dr an output to a DiffResults object populated based on last run.
|
|
* @param calculate_diff default true to populate dr.
|
|
*
|
|
* @return the success or failure of the operation.
|
|
*/
|
|
Status addNewResults(QueryData qd,
|
|
uint64_t epoch,
|
|
uint64_t& counter,
|
|
DiffResults& dr,
|
|
bool calculate_diff = true) const;
|
|
|
|
/**
|
|
* @brief The most recent result set for a scheduled query.
|
|
*
|
|
* @param qd the output QueryData object.
|
|
*
|
|
* @return the success or failure of the operation.
|
|
*/
|
|
Status getCurrentResults(QueryData& qd);
|
|
|
|
public:
|
|
/**
|
|
* @brief Get the names of all historical queries.
|
|
*
|
|
* If you'd like to perform some database maintenance, getStoredQueryNames()
|
|
* allows you to get a vector of the names of all queries which are
|
|
* currently stored in RocksDB
|
|
*
|
|
* @return a vector containing the string names of all scheduled queries.
|
|
*/
|
|
static std::vector<std::string> getStoredQueryNames();
|
|
|
|
private:
|
|
/// The scheduled query and internal
|
|
ScheduledQuery query_;
|
|
|
|
/// The scheduled query name.
|
|
std::string name_;
|
|
|
|
private:
|
|
FRIEND_TEST(QueryTests, test_private_members);
|
|
FRIEND_TEST(QueryTests, test_add_and_get_current_results);
|
|
FRIEND_TEST(QueryTests, test_is_query_name_in_database);
|
|
FRIEND_TEST(QueryTests, test_get_stored_query_names);
|
|
FRIEND_TEST(QueryTests, test_get_executions);
|
|
FRIEND_TEST(QueryTests, test_get_query_results);
|
|
FRIEND_TEST(QueryTests, test_query_name_not_found_in_db);
|
|
};
|
|
|
|
} // namespace osquery
|