query: Force query results into proper order (#2947)

This commit is contained in:
Teddy Reed 2018-01-21 01:20:48 -05:00 committed by GitHub
parent 90a737ead7
commit 483fbbb594
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 97 additions and 97 deletions

View File

@ -47,12 +47,16 @@ using ColumnNames = std::vector<std::string>;
* @brief Serialize a Row into a JSON document.
*
* @param r the Row to serialize.
* @param cols the TableColumn vector indicating column order
* @param doc the managed JSON document.
* @param obj [output] the JSON object to assign values.
*
* @return Status indicating the success or failure of the operation.
*/
Status serializeRow(const Row& r, JSON& doc, rapidjson::Value& obj);
Status serializeRow(const Row& r,
const ColumnNames& cols,
JSON& doc,
rapidjson::Value& obj);
/**
* @brief Serialize a Row object into a JSON string.
@ -99,19 +103,6 @@ using QueryData = std::vector<Row>;
*/
using QueryDataSet = std::multiset<Row>;
/**
* @brief Serialize a QueryData object into a JSON array.
*
* @param q the QueryData to serialize.
* @param doc the managed JSON document.
* @param arr [output] the output JSON array.
*
* @return Status indicating the success or failure of the operation.
*/
Status serializeQueryData(const QueryData& q,
JSON& doc,
rapidjson::Document& arr);
/**
* @brief Serialize a QueryData object into a JSON array.
*
@ -157,13 +148,18 @@ Status deserializeQueryDataJSON(const std::string& json, QueryDataSet& qd);
* new QueryData, DiffResults indicates the "added" subset of rows and the
* "removed" subset of rows.
*/
struct DiffResults {
struct DiffResults : private only_movable {
public:
/// vector of added rows
QueryData added;
/// vector of removed rows
QueryData removed;
DiffResults() {}
DiffResults(DiffResults&&) = default;
DiffResults& operator=(DiffResults&&) = default;
/// equals operator
bool operator==(const DiffResults& comp) const {
return (comp.added == added) && (comp.removed == removed);
@ -181,12 +177,14 @@ struct DiffResults {
* The object JSON will contain two new keys: added and removed.
*
* @param d the DiffResults to serialize.
* @param cols the TableColumn vector indicating column order.
* @param doc the managed JSON document.
* @param obj [output] the output JSON object.
*
* @return Status indicating the success or failure of the operation.
*/
Status serializeDiffResults(const DiffResults& d,
const ColumnNames& cols,
JSON& doc,
rapidjson::Document& obj);
@ -275,7 +273,8 @@ struct QueryPerformance {
* Within the context of osqueryd, a scheduled query may have many relevant
* attributes. Those attributes are represented in this data structure.
*/
struct ScheduledQuery {
struct ScheduledQuery : private only_movable {
public:
/// The SQL query.
std::string query;
@ -298,6 +297,8 @@ struct ScheduledQuery {
std::map<std::string, bool> options;
ScheduledQuery() = default;
ScheduledQuery(ScheduledQuery&&) = default;
ScheduledQuery& operator=(ScheduledQuery&&) = default;
/// equals operator
bool operator==(const ScheduledQuery& comp) const {
@ -318,6 +319,7 @@ struct ScheduledQuery {
* results in potentially-differential form for a logger.
*/
struct QueryLogItem {
public:
/// Differential results from the query.
DiffResults results;
@ -345,6 +347,9 @@ struct QueryLogItem {
/// A set of additional fields to emit with the log line.
std::map<std::string, std::string> decorations;
/// The ordered map of columns from the query.
ColumnNames columns;
/// equals operator
bool operator==(const QueryLogItem& comp) const {
return (comp.results == results) && (comp.name == name);
@ -419,8 +424,8 @@ class Query {
* @param name The query name.
* @param q a SheduledQuery struct.
*/
explicit Query(std::string name, ScheduledQuery q)
: query_(std::move(q)), name_(std::move(name)) {}
explicit Query(std::string name, const ScheduledQuery& q)
: query_(q.query), name_(std::move(name)) {}
/**
* @brief Serialize the data in RocksDB into a useful data structure
@ -529,8 +534,8 @@ class Query {
static std::vector<std::string> getStoredQueryNames();
private:
/// The scheduled query and internal
ScheduledQuery query_;
/// The scheduled query's query string.
std::string query_;
/// The scheduled query name.
std::string name_;

View File

@ -218,7 +218,7 @@ void Pack::initialize(const std::string& name,
? q.value["blacklist"].GetBool()
: true;
schedule_[q.name.GetString()] = query;
schedule_.emplace(std::make_pair(q.name.GetString(), std::move(query)));
}
}

View File

@ -268,18 +268,15 @@ TEST_F(ConfigTests, test_content_update) {
TEST_F(ConfigTests, test_get_scheduled_queries) {
std::vector<std::string> query_names;
std::vector<ScheduledQuery> queries;
get().addPack("unrestricted_pack", "", getUnrestrictedPack().doc());
get().scheduledQueries(
([&queries, &query_names](const std::string& name,
const ScheduledQuery& query) {
([&query_names](const std::string& name, const ScheduledQuery& query) {
query_names.push_back(name);
queries.push_back(query);
}));
auto expected_size = getUnrestrictedPack().doc()["queries"].MemberCount();
EXPECT_EQ(queries.size(), expected_size)
<< "The number of queries in the schedule (" << queries.size()
EXPECT_EQ(query_names.size(), expected_size)
<< "The number of queries in the schedule (" << query_names.size()
<< ") should equal " << expected_size;
ASSERT_FALSE(query_names.empty());
@ -306,21 +303,20 @@ TEST_F(ConfigTests, test_get_scheduled_queries) {
// Try again, this time requesting scheduled queries.
query_names.clear();
queries.clear();
bool blacklisted = false;
get().scheduledQueries(
([&queries, &query_names, &query_name](const std::string& name,
const ScheduledQuery& query) {
([&blacklisted, &query_names, &query_name](const std::string& name,
const ScheduledQuery& query) {
if (name == query_name) {
// Only populate the query we've blacklisted.
query_names.push_back(name);
queries.push_back(query);
blacklisted = query.blacklisted;
}
}),
true);
ASSERT_EQ(query_names.size(), 1_sz);
EXPECT_EQ(query_names[0], query_name);
ASSERT_EQ(queries.size(), 1_sz);
EXPECT_TRUE(queries[0].blacklisted);
EXPECT_TRUE(blacklisted);
}
TEST_F(ConfigTests, test_nonblacklist_query) {
@ -331,16 +327,16 @@ TEST_F(ConfigTests, test_nonblacklist_query) {
get().reset();
get().addPack("unrestricted_pack", "", getUnrestrictedPack().doc());
std::map<std::string, ScheduledQuery> queries;
std::map<std::string, bool> blacklisted;
get().scheduledQueries(
([&queries](const std::string& name, const ScheduledQuery& query) {
queries[name] = query;
([&blacklisted](const std::string& name, const ScheduledQuery& query) {
blacklisted[name] = query.blacklisted;
}));
// This query cannot be blacklisted.
auto query = queries.find(kConfigTestNonBlacklistQuery);
ASSERT_NE(query, queries.end());
EXPECT_FALSE(query->second.blacklisted);
auto query = blacklisted.find(kConfigTestNonBlacklistQuery);
ASSERT_NE(query, blacklisted.end());
EXPECT_FALSE(query->second);
}
class TestConfigParserPlugin : public ConfigParserPlugin {

View File

@ -82,7 +82,7 @@ static inline void saveQuery(const std::string& name,
bool Query::isNewQuery() const {
std::string query;
getDatabaseValue(kQueries, "query." + name_, query);
return (query != query_.query);
return (query != query_);
}
Status Query::addNewResults(QueryData qd,
@ -104,7 +104,7 @@ Status Query::addNewResults(QueryData current_qd,
// This is the first encounter of the scheduled query.
fresh_results = true;
LOG(INFO) << "Storing initial results for new scheduled query: " << name_;
saveQuery(name_, query_.query);
saveQuery(name_, query_);
} else if (getPreviousEpoch() != current_epoch) {
fresh_results = true;
LOG(INFO) << "New Epoch " << current_epoch << " for scheduled query "
@ -113,7 +113,7 @@ Status Query::addNewResults(QueryData current_qd,
// This query is 'new' in that the previous results may be invalid.
new_query = true;
LOG(INFO) << "Scheduled query has been updated: " + name_;
saveQuery(name_, query_.query);
saveQuery(name_, query_);
}
// Use a 'target' avoid copying the query data when serializing and saving.
@ -167,30 +167,32 @@ Status Query::addNewResults(QueryData current_qd,
return Status(0, "OK");
}
Status serializeRow(const Row& r, JSON& doc, rj::Value& obj) {
for (auto& i : r) {
doc.addRef(i.first, i.second, obj);
}
return Status();
}
Status serializeRow(const Row& r,
const ColumnNames& cols,
JSON& doc,
rj::Document& obj) {
try {
for (auto& c : cols) {
doc.addRef(c, r.at(c), obj);
rj::Value& obj) {
if (cols.empty()) {
for (const auto& i : r) {
doc.addRef(i.first, i.second, obj);
}
} else {
for (const auto& c : cols) {
auto i = r.find(c);
if (i != r.end()) {
doc.addRef(c, i->second, obj);
}
}
} catch (const std::exception& e) {
return Status(1, e.what());
}
return Status();
}
Status serializeRowJSON(const Row& r, std::string& json) {
auto doc = JSON::newObject();
auto status = serializeRow(r, doc, doc.doc());
// An empty column list will traverse the row map.
ColumnNames cols;
auto status = serializeRow(r, cols, doc, doc.doc());
if (!status.ok()) {
return status;
}
@ -221,7 +223,9 @@ Status deserializeRowJSON(const std::string& json, Row& r) {
Status serializeQueryDataJSON(const QueryData& q, std::string& json) {
auto doc = JSON::newArray();
auto status = serializeQueryData(q, doc, doc.doc());
ColumnNames cols;
auto status = serializeQueryData(q, cols, doc, doc.doc());
if (!status.ok()) {
return status;
}
@ -278,6 +282,7 @@ Status deserializeQueryDataJSON(const std::string& json, QueryDataSet& qd) {
}
Status serializeDiffResults(const DiffResults& d,
const ColumnNames& cols,
JSON& doc,
rj::Document& obj) {
// Serialize and add "removed" first.
@ -285,14 +290,14 @@ Status serializeDiffResults(const DiffResults& d,
// the logger plugins and their aggregations, allowing them to parse chunked
// lines. Note that the chunking is opaque to the database functions.
auto removed_arr = doc.getArray();
auto status = serializeQueryData(d.removed, doc, removed_arr);
auto status = serializeQueryData(d.removed, cols, doc, removed_arr);
if (!status.ok()) {
return status;
}
doc.add("removed", removed_arr, obj);
auto added_arr = doc.getArray();
status = serializeQueryData(d.added, doc, added_arr);
status = serializeQueryData(d.added, cols, doc, added_arr);
if (!status.ok()) {
return status;
}
@ -323,7 +328,9 @@ Status deserializeDiffResults(const rj::Value& doc, DiffResults& dr) {
Status serializeDiffResultsJSON(const DiffResults& d, std::string& json) {
auto doc = JSON::newObject();
auto status = serializeDiffResults(d, doc, doc.doc());
ColumnNames cols;
auto status = serializeDiffResults(d, cols, doc, doc.doc());
if (!status.ok()) {
return status;
}
@ -394,7 +401,7 @@ inline void getLegacyFieldsAndDecorations(const JSON& doc, QueryLogItem& item) {
Status serializeQueryLogItem(const QueryLogItem& item, JSON& doc) {
if (item.results.added.size() > 0 || item.results.removed.size() > 0) {
auto obj = doc.getObject();
auto status = serializeDiffResults(item.results, doc, obj);
auto status = serializeDiffResults(item.results, item.columns, doc, obj);
if (!status.ok()) {
return status;
}
@ -402,7 +409,8 @@ Status serializeQueryLogItem(const QueryLogItem& item, JSON& doc) {
doc.add("diffResults", obj);
} else {
auto arr = doc.getArray();
auto status = serializeQueryData(item.snapshot_results, doc, arr);
auto status =
serializeQueryData(item.snapshot_results, item.columns, doc, arr);
if (!status.ok()) {
return status;
}
@ -434,13 +442,14 @@ Status serializeEvent(const QueryLogItem& item,
Status serializeQueryLogItemAsEvents(const QueryLogItem& item, JSON& doc) {
auto temp_doc = JSON::newObject();
if (!item.results.added.empty() || !item.results.removed.empty()) {
auto status = serializeDiffResults(item.results, temp_doc, temp_doc.doc());
auto status = serializeDiffResults(
item.results, item.columns, temp_doc, temp_doc.doc());
if (!status.ok()) {
return status;
}
} else if (!item.snapshot_results.empty()) {
auto arr = doc.getArray();
auto status = serializeQueryData(item.snapshot_results, temp_doc, arr);
auto status = serializeQueryData(item.snapshot_results, {}, temp_doc, arr);
if (!status.ok()) {
return status;
}
@ -461,9 +470,9 @@ Status serializeQueryLogItemAsEvents(const QueryLogItem& item, JSON& doc) {
return Status();
}
Status serializeQueryLogItemJSON(const QueryLogItem& i, std::string& json) {
Status serializeQueryLogItemJSON(const QueryLogItem& item, std::string& json) {
auto doc = JSON::newObject();
auto status = serializeQueryLogItem(i, doc);
auto status = serializeQueryLogItem(item, doc);
if (!status.ok()) {
return status;
}
@ -503,10 +512,10 @@ Status deserializeQueryLogItemJSON(const std::string& json,
return deserializeQueryLogItem(doc, item);
}
Status serializeQueryLogItemAsEventsJSON(const QueryLogItem& i,
Status serializeQueryLogItemAsEventsJSON(const QueryLogItem& item,
std::vector<std::string>& items) {
auto doc = JSON::newArray();
auto status = serializeQueryLogItemAsEvents(i, doc);
auto status = serializeQueryLogItemAsEvents(item, doc);
if (!status.ok()) {
return status;
}
@ -521,18 +530,6 @@ Status serializeQueryLogItemAsEventsJSON(const QueryLogItem& i,
return Status();
}
Status serializeQueryData(const QueryData& q, JSON& doc, rj::Document& arr) {
for (const auto& r : q) {
auto row_obj = doc.getObject();
auto status = serializeRow(r, doc, row_obj);
if (!status.ok()) {
return status;
}
doc.push(row_obj, arr);
}
return Status();
}
Status serializeQueryData(const QueryData& q,
const ColumnNames& cols,
JSON& doc,

View File

@ -27,7 +27,7 @@ class QueryTests : public testing::Test {};
TEST_F(QueryTests, test_private_members) {
auto query = getOsqueryScheduledQuery();
auto cf = Query("foobar", query);
EXPECT_EQ(cf.query_, query);
EXPECT_EQ(cf.query_, query.query);
}
TEST_F(QueryTests, test_add_and_get_current_results) {

View File

@ -58,18 +58,18 @@ ColumnNames getExampleColumnNames(size_t x) {
}
static void DATABASE_serialize(benchmark::State& state) {
auto qd = getExampleQueryData(state.range_x(), state.range_y());
auto qd = getExampleQueryData(state.range(0), state.range(1));
while (state.KeepRunning()) {
auto doc = JSON::newArray();
serializeQueryData(qd, doc, doc.doc());
serializeQueryData(qd, {}, doc, doc.doc());
}
}
BENCHMARK(DATABASE_serialize)->ArgPair(1, 1)->ArgPair(10, 10)->ArgPair(10, 100);
static void DATABASE_serialize_column_order(benchmark::State& state) {
auto qd = getExampleQueryData(state.range_x(), state.range_y());
auto cn = getExampleColumnNames(state.range_x());
auto qd = getExampleQueryData(state.range(0), state.range(1));
auto cn = getExampleColumnNames(state.range(0));
while (state.KeepRunning()) {
auto doc = JSON::newArray();
serializeQueryData(qd, cn, doc, doc.doc());
@ -83,7 +83,7 @@ BENCHMARK(DATABASE_serialize_column_order)
->ArgPair(100, 100);
static void DATABASE_serialize_json(benchmark::State& state) {
auto qd = getExampleQueryData(state.range_x(), state.range_y());
auto qd = getExampleQueryData(state.range(0), state.range(1));
while (state.KeepRunning()) {
std::string content;
serializeQueryDataJSON(qd, content);
@ -96,8 +96,8 @@ BENCHMARK(DATABASE_serialize_json)
->ArgPair(10, 100);
static void DATABASE_diff(benchmark::State& state) {
QueryData qd = getExampleQueryData(state.range_x(), state.range_y());
QueryDataSet qds = getExampleQueryDataSet(state.range_x(), state.range_y());
QueryData qd = getExampleQueryData(state.range(0), state.range(1));
QueryDataSet qds = getExampleQueryDataSet(state.range(0), state.range(1));
while (state.KeepRunning()) {
auto d = diff(qds, qd);
}
@ -106,7 +106,7 @@ static void DATABASE_diff(benchmark::State& state) {
BENCHMARK(DATABASE_diff)->ArgPair(1, 1)->ArgPair(10, 10)->ArgPair(10, 100);
static void DATABASE_query_results(benchmark::State& state) {
auto qd = getExampleQueryData(state.range_x(), state.range_y());
auto qd = getExampleQueryData(state.range(0), state.range(1));
auto query = getOsqueryScheduledQuery();
while (state.KeepRunning()) {
DiffResults diff_results;

View File

@ -42,7 +42,7 @@ TEST_F(ResultsTests, test_simple_diff) {
TEST_F(ResultsTests, test_serialize_row) {
auto results = getSerializedRow();
auto doc = JSON::newObject();
auto s = serializeRow(results.second, doc, doc.doc());
auto s = serializeRow(results.second, {}, doc, doc.doc());
EXPECT_TRUE(s.ok());
EXPECT_EQ(s.toString(), "OK");
EXPECT_EQ(doc.doc()["meaning_of_life"], "meaning_of_life_value");
@ -65,7 +65,7 @@ TEST_F(ResultsTests, test_deserialize_row_json) {
TEST_F(ResultsTests, test_serialize_query_data) {
auto results = getSerializedQueryData();
auto doc = JSON::newArray();
auto s = serializeQueryData(results.second, doc, doc.doc());
auto s = serializeQueryData(results.second, {}, doc, doc.doc());
EXPECT_TRUE(s.ok());
EXPECT_EQ(s.toString(), "OK");
EXPECT_EQ(results.first.doc(), doc.doc());
@ -104,7 +104,7 @@ TEST_F(ResultsTests, test_deserialize_query_data_json) {
TEST_F(ResultsTests, test_serialize_diff_results) {
auto results = getSerializedDiffResults();
auto doc = JSON::newObject();
auto s = serializeDiffResults(results.second, doc, doc.doc());
auto s = serializeDiffResults(results.second, {}, doc, doc.doc());
EXPECT_TRUE(s.ok());
EXPECT_EQ(s.toString(), "OK");
EXPECT_EQ(results.first.doc(), doc.doc());

View File

@ -90,6 +90,7 @@ inline void launchQuery(const std::string& name, const ScheduledQuery& query) {
QueryLogItem item;
item.name = name;
item.identifier = ident;
item.columns = sql.columns();
item.time = osquery::getUnixTime();
item.epoch = FLAGS_schedule_epoch;
item.calendar_time = osquery::getAsciiTime();

View File

@ -308,14 +308,14 @@ std::pair<JSON, DiffResults> getSerializedDiffResults() {
doc.add("removed", qd.first.doc());
doc.add("added", qd.first.doc());
return std::make_pair(std::move(doc), diff_results);
return std::make_pair(std::move(doc), std::move(diff_results));
}
std::pair<std::string, DiffResults> getSerializedDiffResultsJSON() {
auto results = getSerializedDiffResults();
std::string output;
results.first.toString(output);
return std::make_pair(output, results.second);
return std::make_pair(output, std::move(results.second));
}
std::pair<std::string, QueryData> getSerializedQueryDataJSON() {
@ -326,10 +326,11 @@ std::pair<std::string, QueryData> getSerializedQueryDataJSON() {
}
std::pair<JSON, QueryLogItem> getSerializedQueryLogItem() {
std::pair<JSON, QueryLogItem> p;
QueryLogItem i;
JSON doc = JSON::newObject();
auto dr = getSerializedDiffResults();
i.results = dr.second;
i.results = std::move(dr.second);
i.name = "foobar";
i.calendar_time = "Mon Aug 25 12:10:57 2014";
i.time = 1408993857;
@ -347,14 +348,14 @@ std::pair<JSON, QueryLogItem> getSerializedQueryLogItem() {
doc.add("epoch", 0_sz);
doc.add("counter", 0_sz);
return std::make_pair(std::move(doc), i);
return std::make_pair(std::move(doc), std::move(i));
}
std::pair<std::string, QueryLogItem> getSerializedQueryLogItemJSON() {
auto results = getSerializedQueryLogItem();
std::string output;
results.first.toString(output);
return std::make_pair(output, results.second);
return std::make_pair(output, std::move(results.second));
}
std::vector<SplitStringTestData> generateSplitStringTestData() {