Fix column order and repeated columns in distributed query (#2926)

This commit is contained in:
lambda-conjecture 2017-01-21 01:52:47 -05:00 committed by Teddy Reed
parent 9e8e401054
commit 721dd1ed62
10 changed files with 149 additions and 13 deletions

View File

@ -72,6 +72,13 @@ using RowData = std::string;
*/
using Row = std::map<std::string, RowData>;
/**
* @brief A vector of column names associated with a query
*
* ColumnNames is a vector of the column names, in order, returned by a query.
*/
using ColumnNames = std::vector<std::string>;
/**
* @brief Serialize a Row into a property tree
*
@ -131,6 +138,19 @@ using QueryData = std::vector<Row>;
Status serializeQueryData(const QueryData& q,
boost::property_tree::ptree& tree);
/**
* @brief Serialize a QueryData object into a property tree
*
* @param q the QueryData to serialize
* @param cols the TableColumn vector indicating column order
* @param tree the output property tree
*
* @return Status indicating the success or failure of the operation
*/
Status serializeQueryData(const QueryData& q,
const ColumnNames& cols,
boost::property_tree::ptree& tree);
/**
* @brief Serialize a QueryData object into a JSON string
*

View File

@ -83,11 +83,13 @@ struct DistributedQueryResult {
DistributedQueryResult() {}
DistributedQueryResult(const DistributedQueryRequest& req,
const QueryData& res,
const ColumnNames& cols,
const Status& s)
: request(req), results(res), status(s) {}
: request(req), results(res), columns(cols), status(s) {}
DistributedQueryRequest request;
QueryData results;
ColumnNames columns;
Status status;
};

View File

@ -89,6 +89,13 @@ class SQL : private only_movable {
*/
const QueryData& rows() const;
/**
* @brief Column information for the query
*
* @return A ColumnNames object for the query
*/
const ColumnNames& columns();
/**
* @brief Accessor to switch off of when checking the success of a query.
*
@ -146,11 +153,17 @@ class SQL : private only_movable {
SQL() {}
protected:
/// The internal member which holds the query string
std::string q_;
/// The internal member which holds the results of the query.
QueryData results_;
/// The internal member which holds the status of the query.
Status status_;
/// The internal member which holds the column names and order for the query
ColumnNames columns_;
};
/**

View File

@ -33,6 +33,14 @@ QueryData getExampleQueryData(size_t x, size_t y) {
return qd;
}
ColumnNames getExampleColumnNames(size_t x) {
ColumnNames cn;
for (size_t i = 0; i < x; i++) {
cn.push_back("key" + std::to_string(i));
}
return cn;
}
static void DATABASE_serialize(benchmark::State& state) {
auto qd = getExampleQueryData(state.range_x(), state.range_y());
while (state.KeepRunning()) {
@ -43,6 +51,20 @@ static void DATABASE_serialize(benchmark::State& state) {
BENCHMARK(DATABASE_serialize)->ArgPair(1, 1)->ArgPair(10, 10)->ArgPair(10, 100);
static void DATABASE_serialize_column_order(benchmark::State& state) {
auto qd = getExampleQueryData(state.range_x(), state.range_y());
auto cn = getExampleColumnNames(state.range_x());
while (state.KeepRunning()) {
boost::property_tree::ptree tree;
serializeQueryData(qd, cn, tree);
}
}
BENCHMARK(DATABASE_serialize_column_order)
->ArgPair(1, 1)
->ArgPair(10, 10)
->ArgPair(10, 100);
static void DATABASE_serialize_json(benchmark::State& state) {
auto qd = getExampleQueryData(state.range_x(), state.range_y());
while (state.KeepRunning()) {

View File

@ -65,6 +65,17 @@ Status serializeRow(const Row& r, pt::ptree& tree) {
return Status(0, "OK");
}
Status serializeRow(const Row& r, const ColumnNames& cols, pt::ptree& tree) {
try {
for (auto& c : cols) {
tree.add<std::string>(c, r.at(c));
}
} catch (const std::exception& e) {
return Status(1, e.what());
}
return Status(0, "OK");
}
Status serializeRowJSON(const Row& r, std::string& json) {
pt::ptree tree;
auto status = serializeRow(r, tree);
@ -116,6 +127,20 @@ Status serializeQueryData(const QueryData& q, pt::ptree& tree) {
return Status(0, "OK");
}
Status serializeQueryData(const QueryData& q,
const ColumnNames& cols,
pt::ptree& tree) {
for (const auto& r : q) {
pt::ptree serialized;
auto s = serializeRow(r, cols, serialized);
if (!s.ok()) {
return s;
}
tree.push_back(std::make_pair("", serialized));
}
return Status(0, "OK");
}
Status serializeQueryDataJSON(const QueryData& q, std::string& json) {
pt::ptree tree;
auto status = serializeQueryData(q, tree);

View File

@ -69,6 +69,16 @@ TEST_F(ResultsTests, test_serialize_query_data) {
EXPECT_EQ(results.first, tree);
}
TEST_F(ResultsTests, test_serialize_query_data_in_column_order) {
auto results = getSerializedQueryDataWithColumnOrder();
auto column_names = getSerializedRowColumnNames(true);
pt::ptree tree;
auto s = serializeQueryData(results.second, column_names, tree);
EXPECT_TRUE(s.ok());
EXPECT_EQ(s.toString(), "OK");
EXPECT_EQ(results.first, tree);
}
TEST_F(ResultsTests, test_serialize_query_data_json) {
auto results = getSerializedQueryDataJSON();
std::string json;

View File

@ -92,7 +92,7 @@ Status Distributed::serializeResults(std::string& json) {
pt::ptree statuses;
for (const auto& result : results_) {
pt::ptree qd;
auto s = serializeQueryData(result.results, qd);
auto s = serializeQueryData(result.results, result.columns, qd);
if (!s.ok()) {
return s;
}
@ -131,7 +131,8 @@ Status Distributed::runQueries() {
<< sql.getMessageString();
}
DistributedQueryResult result(request, sql.rows(), sql.getStatus());
DistributedQueryResult result(
request, sql.rows(), sql.columns(), sql.getStatus());
addResult(result);
}
return flushCompleted();
@ -270,7 +271,7 @@ Status serializeDistributedQueryResult(const DistributedQueryResult& r,
}
pt::ptree results;
s = serializeQueryData(r.results, results);
s = serializeQueryData(r.results, r.columns, results);
if (!s.ok()) {
return s;
}

View File

@ -23,13 +23,25 @@ FLAG(int32, value_max, 512, "Maximum returned row value size");
CREATE_LAZY_REGISTRY(SQLPlugin, "sql");
SQL::SQL(const std::string& q) {
status_ = query(q, results_);
TableColumns table_columns;
q_ = q;
status_ = getQueryColumns(q_, table_columns);
if (status_.ok()) {
for (auto c : table_columns) {
columns_.push_back(std::get<0>(c));
}
status_ = query(q_, results_);
}
}
const QueryData& SQL::rows() const {
return results_;
}
const ColumnNames& SQL::columns() {
return columns_;
}
bool SQL::ok() {
return status_.ok();
}

View File

@ -239,18 +239,41 @@ ScheduledQuery getOsqueryScheduledQuery() {
return sq;
}
std::pair<pt::ptree, Row> getSerializedRow() {
ColumnNames getSerializedRowColumnNames(bool unordered_and_repeated) {
ColumnNames cn;
if (unordered_and_repeated) {
cn.push_back("repeated_column");
}
cn.push_back("alphabetical");
cn.push_back("foo");
cn.push_back("meaning_of_life");
cn.push_back("repeated_column");
return cn;
}
std::pair<pt::ptree, Row> getSerializedRow(bool unordered_and_repeated) {
Row r;
r["foo"] = "bar";
r["meaning_of_life"] = "42";
ColumnNames cns = getSerializedRowColumnNames(unordered_and_repeated);
pt::ptree arr;
arr.put<std::string>("foo", "bar");
arr.put<std::string>("meaning_of_life", "42");
for (auto cn : cns) {
std::string c_value = cn + "_value";
r[cn] = c_value;
arr.add<std::string>(cn, c_value);
}
return std::make_pair(arr, r);
}
std::pair<pt::ptree, QueryData> getSerializedQueryData() {
auto r = getSerializedRow();
auto r = getSerializedRow(false);
QueryData q = {r.second, r.second};
pt::ptree arr;
arr.push_back(std::make_pair("", r.first));
arr.push_back(std::make_pair("", r.first));
return std::make_pair(arr, q);
}
std::pair<pt::ptree, QueryData> getSerializedQueryDataWithColumnOrder() {
auto r = getSerializedRow(true);
QueryData q = {r.second, r.second};
pt::ptree arr;
arr.push_back(std::make_pair("", r.first));

View File

@ -67,15 +67,23 @@ QueryData getTestDBExpectedResults();
// need to be performed on the dataset to make the results be pair.second
std::vector<std::pair<std::string, QueryData> > getTestDBResultStream();
// getSerializedRowColumnNames returns a vector of test column names that
// are in alphabetical order. If unordered_and_repeated is true, the
// vector includes a repeated column name and is in non-alphabetical order
ColumnNames getSerializedRowColumnNames(bool unordered_and_repeated);
// getSerializedRow() return an std::pair where pair->first is a string which
// should serialize to pair->second. pair->second should deserialize
// to pair->first
std::pair<pt::ptree, Row> getSerializedRow();
std::pair<pt::ptree, Row> getSerializedRow(bool unordered_and_repeated = false);
// getSerializedQueryData() return an std::pair where pair->first is a string
// which should serialize to pair->second. pair->second should
// deserialize to pair->first
// deserialize to pair->first. getSerializedQueryDataWithColumnOrder
// returns a pair where pair->second is a tree that has a repeated column and
// the child nodes are not in alphabetical order
std::pair<pt::ptree, QueryData> getSerializedQueryData();
std::pair<pt::ptree, QueryData> getSerializedQueryDataWithColumnOrder();
std::pair<std::string, QueryData> getSerializedQueryDataJSON();
// getSerializedDiffResults() return an std::pair where pair->first is a string