mirror of
https://github.com/valitydev/osquery-1.git
synced 2024-11-06 17:45:22 +00:00
Fix performance issue with the disk serializer
This is the issue noted in #76. Keeping all historical results of queries in the HistoricalQueryResults struct makes serializing and deserializing those structs very, very slow as time goes on. By only storing the last execution of the query, we keep the performance constant, but we kill the feature where osquery can rebuild timelines without accessing logs. After talking it over, we decided that this isn't actually that big of a deal because, if you really wanted to rebuild the old data, you should be able to process the logs, similarly to bin log replication in MySQL.
This commit is contained in:
parent
debdb56616
commit
66a2a6fdec
@ -10,5 +10,7 @@ INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}")
|
||||
INCLUDE_DIRECTORIES("/usr/local/include")
|
||||
LINK_DIRECTORIES("/usr/local/lib")
|
||||
|
||||
ENABLE_TESTING()
|
||||
|
||||
ADD_SUBDIRECTORY(osquery)
|
||||
ADD_SUBDIRECTORY(tools)
|
||||
|
@ -161,30 +161,15 @@ getSerializedHistoricalQueryResults() {
|
||||
auto qd = getSerializedQueryData();
|
||||
auto dr = getSerializedDiffResults();
|
||||
HistoricalQueryResults r;
|
||||
r.executions = std::deque<int>{2, 1};
|
||||
r.mostRecentResults.first = 2;
|
||||
r.mostRecentResults.second = qd.second;
|
||||
r.pastResults[1] = dr.second;
|
||||
|
||||
pt::ptree root;
|
||||
|
||||
pt::ptree executions;
|
||||
pt::ptree item1;
|
||||
item1.put("", 2);
|
||||
executions.push_back(std::make_pair("", item1));
|
||||
pt::ptree item2;
|
||||
item2.put("", 1);
|
||||
executions.push_back(std::make_pair("", item2));
|
||||
root.add_child("executions", executions);
|
||||
|
||||
pt::ptree mostRecentResults;
|
||||
mostRecentResults.add_child("2", qd.first);
|
||||
root.add_child("mostRecentResults", mostRecentResults);
|
||||
|
||||
pt::ptree pastResults;
|
||||
pastResults.add_child("1", dr.first);
|
||||
root.add_child("pastResults", pastResults);
|
||||
|
||||
return std::make_pair(root, r);
|
||||
}
|
||||
|
||||
|
@ -68,20 +68,6 @@ bool Query::isQueryNameInDatabase(std::shared_ptr<DBHandle> db) {
|
||||
return std::find(names.begin(), names.end(), query_.name) != names.end();
|
||||
}
|
||||
|
||||
Status Query::getExecutions(std::deque<int>& results) {
|
||||
return getExecutions(results, DBHandle::getInstance());
|
||||
}
|
||||
|
||||
Status Query::getExecutions(std::deque<int>& results,
|
||||
std::shared_ptr<DBHandle> db) {
|
||||
HistoricalQueryResults hQR;
|
||||
auto s = getHistoricalQueryResults(hQR, db);
|
||||
if (s.ok()) {
|
||||
results = hQR.executions;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status Query::addNewResults(const osquery::db::QueryData& qd, int unix_time) {
|
||||
return addNewResults(qd, unix_time, DBHandle::getInstance());
|
||||
}
|
||||
@ -112,10 +98,8 @@ osquery::Status Query::addNewResults(const osquery::db::QueryData& qd,
|
||||
if (calculate_diff) {
|
||||
dr = diff(hQR.mostRecentResults.second, qd);
|
||||
}
|
||||
hQR.pastResults[hQR.mostRecentResults.first] = dr;
|
||||
hQR.mostRecentResults.first = unix_time;
|
||||
hQR.mostRecentResults.second = qd;
|
||||
hQR.executions.push_front(unix_time);
|
||||
std::string json;
|
||||
auto serialize_status = serializeHistoricalQueryResultsJSON(hQR, json);
|
||||
if (!serialize_status.ok()) {
|
||||
|
@ -75,16 +75,6 @@ class Query {
|
||||
private:
|
||||
bool isQueryNameInDatabase(std::shared_ptr<DBHandle> db);
|
||||
|
||||
// getExecutions() returns a deque of timestamps of previous query
|
||||
// executions. These timestamp values are used as the RocksDB sub-keys which
|
||||
// represent the data stored as a result of those executions.
|
||||
public:
|
||||
osquery::Status getExecutions(std::deque<int>& results);
|
||||
|
||||
private:
|
||||
osquery::Status getExecutions(std::deque<int>& results,
|
||||
std::shared_ptr<DBHandle> db);
|
||||
|
||||
// addNewResults adds a new result set to the local data store. If you
|
||||
// want the diff of the results you've just added, pass a reference to a
|
||||
// diffResults struct
|
||||
|
@ -114,22 +114,6 @@ TEST_F(QueryTests, test_get_stored_query_names) {
|
||||
EXPECT_NE(in_vector, names.end());
|
||||
}
|
||||
|
||||
TEST_F(QueryTests, test_get_executions) {
|
||||
auto hQR = getSerializedHistoricalQueryResultsJSON();
|
||||
auto query = getOsqueryScheduledQuery();
|
||||
auto db = DBHandle::getInstanceAtPath("/tmp/rocksdb-osquery-test14");
|
||||
auto put_status = db->Put(kQueries, query.name, hQR.first);
|
||||
EXPECT_TRUE(put_status.ok());
|
||||
EXPECT_EQ(put_status.toString(), "OK");
|
||||
auto cf = Query(query);
|
||||
std::deque<int> results;
|
||||
std::deque<int> expected = {2, 1};
|
||||
auto s = cf.getExecutions(results, db);
|
||||
EXPECT_TRUE(s.ok());
|
||||
EXPECT_EQ(s.toString(), "OK");
|
||||
EXPECT_EQ(results, expected);
|
||||
}
|
||||
|
||||
TEST_F(QueryTests, test_get_current_results) {
|
||||
auto hQR = getSerializedHistoricalQueryResultsJSON();
|
||||
auto query = getOsqueryScheduledQuery();
|
||||
|
@ -157,16 +157,7 @@ Status serializeHistoricalQueryResultsJSON(const HistoricalQueryResults& r,
|
||||
Status serializeHistoricalQueryResults(const HistoricalQueryResults& r,
|
||||
pt::ptree& tree) {
|
||||
try {
|
||||
pt::ptree executions;
|
||||
pt::ptree mostRecentResults;
|
||||
pt::ptree pastResults;
|
||||
|
||||
for (const auto& e : r.executions) {
|
||||
pt::ptree item;
|
||||
item.put("", e);
|
||||
executions.push_back(std::make_pair("", item));
|
||||
}
|
||||
tree.add_child("executions", executions);
|
||||
|
||||
pt::ptree most_recent_serialized;
|
||||
auto mrr_status =
|
||||
@ -178,17 +169,6 @@ Status serializeHistoricalQueryResults(const HistoricalQueryResults& r,
|
||||
boost::lexical_cast<std::string>(r.mostRecentResults.first),
|
||||
most_recent_serialized);
|
||||
tree.add_child("mostRecentResults", mostRecentResults);
|
||||
|
||||
for (const auto& i : r.pastResults) {
|
||||
pt::ptree serialized_diff_results;
|
||||
auto dr_status = serializeDiffResults(i.second, serialized_diff_results);
|
||||
if (!dr_status.ok()) {
|
||||
return dr_status;
|
||||
}
|
||||
pastResults.add_child(boost::lexical_cast<std::string>(i.first),
|
||||
serialized_diff_results);
|
||||
}
|
||||
tree.add_child("pastResults", pastResults);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
return Status(1, e.what());
|
||||
@ -199,17 +179,6 @@ Status serializeHistoricalQueryResults(const HistoricalQueryResults& r,
|
||||
Status deserializeHistoricalQueryResults(const pt::ptree& tree,
|
||||
HistoricalQueryResults& r) {
|
||||
try {
|
||||
for (const auto& v : tree.get_child("executions")) {
|
||||
try {
|
||||
int execution =
|
||||
boost::lexical_cast<int>(v.second.get_value<std::string>());
|
||||
r.executions.push_back(execution);
|
||||
}
|
||||
catch (const boost::bad_lexical_cast& e) {
|
||||
return Status(1, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& v : tree.get_child("mostRecentResults")) {
|
||||
try {
|
||||
int execution = boost::lexical_cast<int>(v.first);
|
||||
@ -218,6 +187,7 @@ Status deserializeHistoricalQueryResults(const pt::ptree& tree,
|
||||
catch (const boost::bad_lexical_cast& e) {
|
||||
return Status(1, e.what());
|
||||
}
|
||||
|
||||
QueryData q;
|
||||
for (const auto& each : v.second) {
|
||||
Row row_;
|
||||
@ -229,32 +199,6 @@ Status deserializeHistoricalQueryResults(const pt::ptree& tree,
|
||||
r.mostRecentResults.second = q;
|
||||
}
|
||||
|
||||
for (const auto& v : tree.get_child("pastResults")) {
|
||||
int execution;
|
||||
try {
|
||||
execution = boost::lexical_cast<int>(v.first);
|
||||
}
|
||||
catch (const boost::bad_lexical_cast& e) {
|
||||
return Status(1, e.what());
|
||||
}
|
||||
DiffResults dr;
|
||||
for (const auto& a : v.second.get_child("added")) {
|
||||
Row row_;
|
||||
for (const auto& each : a.second) {
|
||||
row_[each.first] = each.second.get_value<std::string>();
|
||||
}
|
||||
dr.added.push_back(row_);
|
||||
}
|
||||
for (const auto& r : v.second.get_child("removed")) {
|
||||
Row row_;
|
||||
for (const auto& each : r.second) {
|
||||
row_[each.first] = each.second.get_value<std::string>();
|
||||
}
|
||||
dr.removed.push_back(row_);
|
||||
}
|
||||
r.pastResults[execution] = dr;
|
||||
}
|
||||
|
||||
return Status(0, "OK");
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
|
@ -98,25 +98,14 @@ DiffResults diff(const QueryData& old_, const QueryData& new_);
|
||||
// HistoricalQueryResults is a struct which represents a scheduled query's
|
||||
// historical results on disk
|
||||
struct HistoricalQueryResults {
|
||||
// a vector of timestamps, sorted by time, starting with the most recent
|
||||
std::deque<int> executions;
|
||||
|
||||
// mostRecentResults->first is the timestamp of the most recent results and
|
||||
// mostRecentResults->second is the query result data of the most recent
|
||||
// query
|
||||
std::pair<int, QueryData> mostRecentResults;
|
||||
|
||||
// pastResults is a map of timestamps to data diffs. All timestamps can be
|
||||
// found pre-sorted in executions. By taking mostRecentResults->second and
|
||||
// applying the diffs from pastResults (in time order), you can reconstruct
|
||||
// the entire history of a query's results
|
||||
std::map<int, DiffResults> pastResults;
|
||||
|
||||
// equals operator
|
||||
bool operator==(const HistoricalQueryResults& comp) const {
|
||||
return (comp.executions == executions) &&
|
||||
(comp.mostRecentResults == mostRecentResults) &&
|
||||
(comp.pastResults == pastResults);
|
||||
return (comp.mostRecentResults == mostRecentResults);
|
||||
}
|
||||
|
||||
// not equals operator
|
||||
|
@ -90,9 +90,7 @@ TEST_F(ResultsTests, test_deserialize_historical_query_results) {
|
||||
HistoricalQueryResults r;
|
||||
auto s = deserializeHistoricalQueryResults(results.first, r);
|
||||
EXPECT_EQ(results.second, r);
|
||||
EXPECT_EQ(results.second.executions, r.executions);
|
||||
EXPECT_EQ(results.second.mostRecentResults, r.mostRecentResults);
|
||||
EXPECT_EQ(results.second.pastResults, r.pastResults);
|
||||
EXPECT_TRUE(s.ok());
|
||||
EXPECT_EQ(s.toString(), "OK");
|
||||
}
|
||||
@ -102,9 +100,7 @@ TEST_F(ResultsTests, test_deserialize_historical_query_results_json) {
|
||||
HistoricalQueryResults r;
|
||||
auto s = deserializeHistoricalQueryResultsJSON(results.first, r);
|
||||
EXPECT_EQ(results.second, r);
|
||||
EXPECT_EQ(results.second.executions, r.executions);
|
||||
EXPECT_EQ(results.second.mostRecentResults, r.mostRecentResults);
|
||||
EXPECT_EQ(results.second.pastResults, r.pastResults);
|
||||
EXPECT_TRUE(s.ok());
|
||||
EXPECT_EQ(s.toString(), "OK");
|
||||
}
|
||||
|
@ -87,17 +87,18 @@ Row parseLaunchdItem(const std::string& path, const pt::ptree& tree) {
|
||||
r["name"] = bits[bits.size() - 1];
|
||||
|
||||
for (const auto& it : kLaunchdTopLevelStringKeys) {
|
||||
std::string item;
|
||||
try {
|
||||
std::string item = tree.get<std::string>(it.first);
|
||||
item = tree.get<std::string>(it.first);
|
||||
if (it.first == "Program") {
|
||||
boost::replace_all(item, " ", "\\ ");
|
||||
}
|
||||
r[it.second] = item;
|
||||
}
|
||||
catch (const pt::ptree_error& e) {
|
||||
VLOG(1) << "Error parsing " << it.first << " from " << path << ": "
|
||||
VLOG(3) << "Error parsing " << it.first << " from " << path << ": "
|
||||
<< e.what();
|
||||
}
|
||||
r[it.second] = item;
|
||||
}
|
||||
|
||||
for (const auto& it : kLaunchdTopLevelArrayKeys) {
|
||||
|
Loading…
Reference in New Issue
Block a user