Plumb used columns into QueryContext; use that to make processes table more efficient on macOS (#4269)

This commit is contained in:
Jonathan Keljo 2018-05-12 09:07:57 -07:00 committed by Nick Anderson
parent a5df5acc01
commit 1ea714a325
9 changed files with 342 additions and 116 deletions

View File

@ -127,6 +127,23 @@ class SQL : private only_movable {
ConstraintOperator op,
const std::string& expr);
/**
* @brief Get columns with constraint, 'SELECT [columns] ... where', results
* given a virtual table name, column names, and single constraint.
*
* @param columns the columns to return
* @param table The name of the virtual table.
* @param column Table column name to apply constraint.
* @param op The SQL comparative operator.
* @param expr The constraint expression.
* @return A QueryData object of the 'SELECT [columns] ...' query results.
*/
static QueryData selectFrom(const std::initializer_list<std::string> columns,
const std::string& table,
const std::string& column,
ConstraintOperator op,
const std::string& expr);
protected:
/**
* @brief Private default constructor.

View File

@ -13,6 +13,7 @@
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
@ -439,6 +440,9 @@ using ConstraintMap = std::map<std::string, struct ConstraintList>;
/// Populate a constraint list from a query's parsed predicate.
using ConstraintSet = std::vector<std::pair<std::string, struct Constraint>>;
/// Keep track of which columns are used
using UsedColumns = std::unordered_set<std::string>;
/**
* @brief osquery table content descriptor.
*
@ -476,6 +480,9 @@ struct VirtualTableContent {
/// Transient set of virtual table access constraints.
std::unordered_map<size_t, ConstraintSet> constraints;
/// Transient set of virtual table used columns
std::unordered_map<size_t, UsedColumns> colsUsed;
/*
* @brief A table implementation specific query result cache.
*
@ -605,6 +612,47 @@ struct QueryContext : private only_movable {
std::function<Status(const std::string& constraint,
std::set<std::string>& output)> predicate);
/// Check if the given column is used by the query
bool isColumnUsed(const std::string& colName) const;
/// Check if any of the given columns is used by the query
bool isAnyColumnUsed(std::initializer_list<std::string> colNames) const;
template <typename Type>
inline void setTextColumnIfUsed(Row& r,
const std::string& colName,
const Type& value) const {
if (isColumnUsed(colName)) {
r[colName] = TEXT(value);
}
}
template <typename Type>
inline void setIntegerColumnIfUsed(Row& r,
const std::string& colName,
const Type& value) const {
if (isColumnUsed(colName)) {
r[colName] = INTEGER(value);
}
}
template <typename Type>
inline void setBigIntColumnIfUsed(Row& r,
const std::string& colName,
const Type& value) const {
if (isColumnUsed(colName)) {
r[colName] = BIGINT(value);
}
}
inline void setColumnIfUsed(Row& r,
const std::string& colName,
const std::string& value) const {
if (isColumnUsed(colName)) {
r[colName] = value;
}
}
/// Check if a table-defined index exists within the query cache.
bool isCached(const std::string& index) const;
@ -631,6 +679,8 @@ struct QueryContext : private only_movable {
/// The map of column name to constraint list.
ConstraintMap constraints;
boost::optional<UsedColumns> colsUsed;
private:
/// If false then the context is maintaining an ephemeral cache.
bool enable_cache_{false};

View File

@ -69,6 +69,15 @@ void TablePlugin::setRequestFromContext(const QueryContext& context,
}
doc.add("constraints", constraints);
if (context.colsUsed) {
auto colsUsed = doc.getArray();
for (const auto& columnName : *context.colsUsed) {
doc.pushCopy(columnName, colsUsed);
}
doc.add("colsUsed", colsUsed);
}
doc.toString(request["context"]);
}
@ -76,6 +85,15 @@ void TablePlugin::setContextFromRequest(const PluginRequest& request,
QueryContext& context) {
auto doc = JSON::newObject();
doc.fromString(request.at("context"));
if (doc.doc().HasMember("colsUsed")) {
UsedColumns colsUsed;
for (const auto& columnName : doc.doc()["colsUsed"].GetArray()) {
colsUsed.insert(columnName.GetString());
}
context.colsUsed = colsUsed;
}
if (!doc.doc().HasMember("constraints") ||
!doc.doc()["constraints"].IsArray()) {
return;
@ -407,6 +425,20 @@ void ConstraintList::deserialize(const rapidjson::Value& obj) {
affinity = columnTypeName(affinity_name);
}
bool QueryContext::isColumnUsed(const std::string& colName) const {
return !colsUsed || colsUsed->find(colName) != colsUsed->end();
}
bool QueryContext::isAnyColumnUsed(
std::initializer_list<std::string> colNames) const {
for (auto& colName : colNames) {
if (isColumnUsed(colName)) {
return true;
}
}
return false;
}
void QueryContext::useCache(bool use_cache) {
use_cache_ = use_cache;
}

View File

@ -439,7 +439,12 @@ QueryData WatcherRunner::getProcessRow(pid_t pid) const {
#ifdef WIN32
p = (pid == ULONG_MAX) ? -1 : pid;
#endif
return SQL::selectAllFrom("processes", "pid", EQUALS, INTEGER(p));
return SQL::selectFrom(
{"parent", "user_time", "system_time", "resident_size"},
"processes",
"pid",
EQUALS,
INTEGER(p));
}
Status WatcherRunner::isChildSane(const PlatformProcess& child) const {
@ -507,8 +512,11 @@ void WatcherRunner::createWorker() {
}
// Get the path of the current process.
auto qd = SQL::selectAllFrom(
"processes", "pid", EQUALS, INTEGER(PlatformProcess::getCurrentPid()));
auto qd = SQL::selectFrom({"path"},
"processes",
"pid",
EQUALS,
INTEGER(PlatformProcess::getCurrentPid()));
if (qd.size() != 1 || qd[0].count("path") == 0 || qd[0]["path"].size() == 0) {
LOG(ERROR) << "osquery watcher cannot determine process path for worker";
Initializer::requestShutdown(EXIT_FAILURE);

View File

@ -47,13 +47,21 @@ DECLARE_bool(events_optimize);
SQLInternal monitor(const std::string& name, const ScheduledQuery& query) {
// Snapshot the performance and times for the worker before running.
auto pid = std::to_string(PlatformProcess::getCurrentPid());
auto r0 = SQL::selectAllFrom("processes", "pid", EQUALS, pid);
auto r0 = SQL::selectFrom({"resident_size", "user_time", "system_time"},
"processes",
"pid",
EQUALS,
pid);
auto t0 = getUnixTime();
Config::get().recordQueryStart(name);
SQLInternal sql(query.query, true);
// Snapshot the performance after, and compare.
auto t1 = getUnixTime();
auto r1 = SQL::selectAllFrom("processes", "pid", EQUALS, pid);
auto r1 = SQL::selectFrom({"resident_size", "user_time", "system_time"},
"processes",
"pid",
EQUALS,
pid);
if (r0.size() > 0 && r1.size() > 0) {
// Calculate a size as the expected byte output of results.
// This does not dedup result differentials and is not aware of snapshots.

View File

@ -121,6 +121,25 @@ QueryData SQL::selectAllFrom(const std::string& table,
return response;
}
QueryData SQL::selectFrom(const std::initializer_list<std::string> columns,
const std::string& table,
const std::string& column,
ConstraintOperator op,
const std::string& expr) {
PluginRequest request = {{"action", "generate"}};
{
// Create a fake content, there will be no caching.
QueryContext ctx;
ctx.constraints[column].add(Constraint(op, expr));
ctx.colsUsed = UsedColumns(columns);
TablePlugin::setRequestFromContext(ctx, request);
}
PluginResponse response;
Registry::call("table", table, request, response);
return response;
}
Status SQLPlugin::call(const PluginRequest& request, PluginResponse& response) {
response.clear();
if (request.count("action") == 0) {

View File

@ -294,6 +294,7 @@ void SQLiteDBInstance::clearAffectedTables() {
for (const auto& table : affected_tables_) {
table.second->constraints.clear();
table.second->cache.clear();
table.second->colsUsed.clear();
}
// Since the affected tables are cleared, there are no more affected tables.
// There is no concept of compounding tables between queries.

View File

@ -418,6 +418,24 @@ static int xBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo) {
cost += 200;
}
UsedColumns colsUsed;
if (pIdxInfo->colUsed > 0) {
for (size_t i = 0; i < columns.size(); i++) {
// Check whether the column is used. colUsed has one bit for each of the
// first 63 columns, and the 64th bit indicates that at least one other
// column is used.
uint64_t flag;
if (i < 63) {
flag = 1LL << i;
} else {
flag = 1LL << 63;
}
if ((pIdxInfo->colUsed & flag) != 0) {
colsUsed.insert(std::get<0>(columns[i]));
}
}
}
pIdxInfo->idxNum = static_cast<int>(kConstraintIndexID++);
#if defined(DEBUG)
plan("Recording constraint set for table: " + pVtab->content->name +
@ -427,6 +445,7 @@ static int xBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo) {
#endif
// Add the constraint set to the table's tracked constraints.
pVtab->content->constraints[pIdxInfo->idxNum] = std::move(constraints);
pVtab->content->colsUsed[pIdxInfo->idxNum] = std::move(colsUsed);
pIdxInfo->estimatedCost = cost;
return SQLITE_OK;
}
@ -530,6 +549,10 @@ static int xFilter(sqlite3_vtab_cursor* pVtabCursor,
}
}
if (content->colsUsed.size() > 0) {
context.colsUsed = content->colsUsed[idxNum];
}
if (!user_based_satisfied) {
LOG(WARNING) << "The " << pVtab->content->name
<< " table returns data based on the current user by default, "

View File

@ -109,7 +109,10 @@ struct proc_cred {
} real, effective, saved;
};
inline bool getProcCred(int pid, proc_cred& cred) {
inline bool genProcCred(QueryContext& context,
int pid,
proc_cred& cred,
Row& r) {
struct proc_bsdinfo bsdinfo;
struct proc_bsdshortinfo bsdinfo_short;
@ -125,7 +128,6 @@ inline bool getProcCred(int pid, proc_cred& cred) {
cred.effective.gid = bsdinfo.pbi_gid;
cred.saved.uid = bsdinfo.pbi_svuid;
cred.saved.gid = bsdinfo.pbi_svgid;
return true;
} else if (proc_pidinfo(pid,
PROC_PIDT_SHORTBSDINFO,
1,
@ -141,9 +143,26 @@ inline bool getProcCred(int pid, proc_cred& cred) {
cred.effective.gid = bsdinfo_short.pbsi_gid;
cred.saved.uid = bsdinfo_short.pbsi_svuid;
cred.saved.gid = bsdinfo_short.pbsi_svgid;
return true;
} else {
return false;
}
return false;
context.setBigIntColumnIfUsed(r, "parent", cred.parent);
context.setBigIntColumnIfUsed(r, "pgroup", cred.group);
context.setTextColumnIfUsed(r,
"state",
(1 <= cred.status && cred.status <= 5)
? kProcessStateMapping[cred.status]
: '?');
context.setIntegerColumnIfUsed(r, "nice", cred.nice);
context.setBigIntColumnIfUsed(r, "uid", cred.real.uid);
context.setBigIntColumnIfUsed(r, "gid", cred.real.gid);
context.setBigIntColumnIfUsed(r, "euid", cred.effective.uid);
context.setBigIntColumnIfUsed(r, "egid", cred.effective.gid);
context.setBigIntColumnIfUsed(r, "suid", cred.saved.uid);
context.setBigIntColumnIfUsed(r, "sgid", cred.saved.gid);
return true;
}
// Get the max args space
@ -161,24 +180,92 @@ static inline int genMaxArgs() {
return argmax;
}
void genProcRootAndCWD(int pid, Row& r) {
r["cwd"] = "";
r["root"] = "";
void genProcRootAndCWD(const QueryContext& context, int pid, Row& r) {
if (!context.isAnyColumnUsed({"cwd", "root"})) {
return;
}
struct proc_vnodepathinfo pathinfo;
if (proc_pidinfo(
pid, PROC_PIDVNODEPATHINFO, 0, &pathinfo, sizeof(pathinfo)) ==
sizeof(pathinfo)) {
if (pathinfo.pvi_cdir.vip_vi.vi_stat.vst_dev != 0) {
if (context.isColumnUsed("cwd") &&
pathinfo.pvi_cdir.vip_vi.vi_stat.vst_dev != 0) {
r["cwd"] = std::string(pathinfo.pvi_cdir.vip_path);
}
if (pathinfo.pvi_rdir.vip_vi.vi_stat.vst_dev != 0) {
if (context.isColumnUsed("root") &&
pathinfo.pvi_rdir.vip_vi.vi_stat.vst_dev != 0) {
r["root"] = std::string(pathinfo.pvi_rdir.vip_path);
}
}
}
void genProcNamePathAndOnDisk(const QueryContext& context,
int pid,
const struct proc_cred& cred,
Row& r) {
if (!context.isAnyColumnUsed({"name", "path", "on_disk"})) {
return;
}
std::string path;
if (pid == 0) {
path = "";
if (context.isColumnUsed("name")) {
// For some reason not even proc_name gives back a name for kernel_task
r["name"] = "kernel_task";
}
} else if (cred.status != 5) { // If the process is not a Zombie, try to
// find the path and name.
path = getProcPath(pid);
if (context.isColumnUsed("name")) {
// OS X proc_name only returns 16 bytes, use the basename of the path.
r["name"] = fs::path(path).filename().string();
}
} else {
path = "";
if (context.isColumnUsed("name")) {
std::vector<char> name(17);
proc_name(pid, name.data(), 16);
r["name"] = std::string(name.data());
}
}
context.setTextColumnIfUsed(r, "path", path);
if (!context.isColumnUsed("on_disk")) {
return;
}
// If the path of the executable that started the process is available and
// the path exists on disk, set on_disk to 1. If the path is not
// available, set on_disk to -1. If, and only if, the path of the
// executable is available and the file does NOT exist on disk, set on_disk
// to 0.
if (path.empty()) {
r["on_disk"] = INTEGER(-1);
} else if (pathExists(path)) {
r["on_disk"] = INTEGER(1);
} else {
r["on_disk"] = INTEGER(0);
}
}
void genProcNumThreads(QueryContext& context, int pid, Row& r) {
if (!context.isColumnUsed("threads")) {
return;
}
struct proc_taskinfo task_info;
int status =
proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &task_info, sizeof(task_info));
if (status == sizeof(task_info)) {
r["threads"] = INTEGER(task_info.pti_threadnum);
} else {
r["threads"] = "-1";
}
}
struct proc_args {
std::vector<std::string> args;
std::map<std::string, std::string> env;
@ -227,6 +314,18 @@ proc_args getProcRawArgs(int pid, size_t argmax) {
return args;
}
void genProcCmdline(const QueryContext& context, int pid, Row& r) {
if (!context.isColumnUsed("cmdline")) {
return;
}
int argmax = genMaxArgs();
// The command line invocation including arguments.
auto args = getProcRawArgs(pid, argmax);
std::string cmdline = boost::algorithm::join(args.args, " ");
r["cmdline"] = cmdline;
}
static inline long getUptimeInUSec() {
struct timeval boot_time;
size_t len = sizeof(boot_time);
@ -246,98 +345,49 @@ static inline long getUptimeInUSec() {
tv.tv_usec);
}
QueryData genProcesses(QueryContext& context) {
QueryData results;
// Initialize time conversions.
static mach_timebase_info_data_t time_base;
if (time_base.denom == 0) {
mach_timebase_info(&time_base);
void genProcResourceUsage(const QueryContext& context, int pid, Row& r) {
if (!context.isAnyColumnUsed({"wired_size",
"resident_size",
"total_size",
"user_time",
"system_time",
"disk_bytes_read",
"disk_bytes_written",
"start_time"})) {
return;
}
auto pidlist = getProcList(context);
int argmax = genMaxArgs();
struct rusage_info_v2 rusage_info_data;
int status =
proc_pid_rusage(pid, RUSAGE_INFO_V2, (rusage_info_t*)&rusage_info_data);
// proc_pid_rusage returns -1 if it was unable to gather information
if (status == 0) {
// size/memory information
context.setTextColumnIfUsed(
r, "wired_size", rusage_info_data.ri_wired_size);
context.setTextColumnIfUsed(
r, "resident_size", rusage_info_data.ri_resident_size);
context.setTextColumnIfUsed(
r, "total_size", rusage_info_data.ri_phys_footprint);
for (auto& pid : pidlist) {
Row r;
r["pid"] = INTEGER(pid);
// time information
context.setTextColumnIfUsed(
r, "user_time", rusage_info_data.ri_user_time / CPU_TIME_RATIO);
context.setTextColumnIfUsed(
r, "system_time", rusage_info_data.ri_system_time / CPU_TIME_RATIO);
{
// The command line invocation including arguments.
auto args = getProcRawArgs(pid, argmax);
std::string cmdline = boost::algorithm::join(args.args, " ");
r["cmdline"] = cmdline;
}
// disk i/o information
context.setTextColumnIfUsed(
r, "disk_bytes_read", rusage_info_data.ri_diskio_bytesread);
context.setTextColumnIfUsed(
r, "disk_bytes_written", rusage_info_data.ri_diskio_byteswritten);
// The process relative root and current working directory.
genProcRootAndCWD(pid, r);
proc_cred cred;
if (getProcCred(pid, cred)) {
r["parent"] = BIGINT(cred.parent);
r["pgroup"] = BIGINT(cred.group);
// check if process state is one of the expected ones
r["state"] = (1 <= cred.status && cred.status <= 5)
? TEXT(kProcessStateMapping[cred.status])
: TEXT('?');
r["nice"] = INTEGER(cred.nice);
r["uid"] = BIGINT(cred.real.uid);
r["gid"] = BIGINT(cred.real.gid);
r["euid"] = BIGINT(cred.effective.uid);
r["egid"] = BIGINT(cred.effective.gid);
r["suid"] = BIGINT(cred.saved.uid);
r["sgid"] = BIGINT(cred.saved.gid);
} else {
continue;
}
if (pid == 0) {
r["path"] = "";
// For some reason not even proc_name gives back a name for kernel_task
r["name"] = "kernel_task";
} else if (cred.status != 5) { // If the process is not a Zombie, try to
// find the path and name.
r["path"] = getProcPath(pid);
// OS X proc_name only returns 16 bytes, use the basename of the path.
r["name"] = fs::path(r["path"]).filename().string();
} else {
r["path"] = "";
std::vector<char> name(17);
proc_name(pid, name.data(), 16);
r["name"] = std::string(name.data());
}
// If the path of the executable that started the process is available and
// the path exists on disk, set on_disk to 1. If the path is not
// available, set on_disk to -1. If, and only if, the path of the
// executable is available and the file does NOT exist on disk, set on_disk
// to 0.
if (r["path"].empty()) {
r["on_disk"] = INTEGER(-1);
} else if (pathExists(r["path"])) {
r["on_disk"] = INTEGER(1);
} else {
r["on_disk"] = INTEGER(0);
}
// systems usage and time information
struct rusage_info_v2 rusage_info_data;
int status =
proc_pid_rusage(pid, RUSAGE_INFO_V2, (rusage_info_t*)&rusage_info_data);
// proc_pid_rusage returns -1 if it was unable to gather information
if (status == 0) {
// size/memory information
r["wired_size"] = TEXT(rusage_info_data.ri_wired_size);
r["resident_size"] = TEXT(rusage_info_data.ri_resident_size);
r["total_size"] = TEXT(rusage_info_data.ri_phys_footprint);
// time information
r["user_time"] = TEXT(rusage_info_data.ri_user_time / CPU_TIME_RATIO);
r["system_time"] = TEXT(rusage_info_data.ri_system_time / CPU_TIME_RATIO);
// disk i/o information
r["disk_bytes_read"] = TEXT(rusage_info_data.ri_diskio_bytesread);
r["disk_bytes_written"] = TEXT(rusage_info_data.ri_diskio_byteswritten);
if (context.isColumnUsed("start_time")) {
// Initialize time conversions.
static mach_timebase_info_data_t time_base;
if (time_base.denom == 0) {
mach_timebase_info(&time_base);
}
// Below is the logic to caculate the start_time since boot time
// with higher precision
@ -355,23 +405,41 @@ QueryData genProcesses(QueryContext& context) {
// Get the start_time of process since the computer started
r["start_time"] = TEXT((uptime + seconds_since_launch) / CPU_TIME_RATIO);
} else {
r["wired_size"] = "-1";
r["resident_size"] = "-1";
r["total_size"] = "-1";
r["user_time"] = "-1";
r["system_time"] = "-1";
r["start_time"] = "-1";
}
} else {
context.setTextColumnIfUsed(r, "wired_size", "-1");
context.setTextColumnIfUsed(r, "resident_size", "-1");
context.setTextColumnIfUsed(r, "total_size", "-1");
context.setTextColumnIfUsed(r, "user_time", "-1");
context.setTextColumnIfUsed(r, "system_time", "-1");
context.setTextColumnIfUsed(r, "start_time", "-1");
}
}
QueryData genProcesses(QueryContext& context) {
QueryData results;
auto pidlist = getProcList(context);
for (const auto& pid : pidlist) {
Row r;
context.setIntegerColumnIfUsed(r, "pid", pid);
genProcCmdline(context, pid, r);
// The process relative root and current working directory.
genProcRootAndCWD(context, pid, r);
proc_cred cred;
if (!genProcCred(context, pid, cred, r)) {
continue;
}
struct proc_taskinfo task_info;
status =
proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &task_info, sizeof(task_info));
if (status == sizeof(task_info)) {
r["threads"] = INTEGER(task_info.pti_threadnum);
} else {
r["threads"] = "-1";
}
genProcNamePathAndOnDisk(context, pid, cred, r);
// systems usage and time information
genProcResourceUsage(context, pid, r);
genProcNumThreads(context, pid, r);
results.push_back(r);
}