mirror of
https://github.com/valitydev/osquery-1.git
synced 2024-11-08 18:33:54 +00:00
572 lines
18 KiB
C++
572 lines
18 KiB
C++
/*
|
|
* Copyright (c) 2014-present, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under the BSD-style license found in the
|
|
* LICENSE file in the root directory of this source tree. An additional grant
|
|
* of patent rights can be found in the PATENTS file in the same directory.
|
|
*
|
|
*/
|
|
|
|
#include <cstring>
|
|
|
|
#include <math.h>
|
|
#include <signal.h>
|
|
|
|
#ifndef WIN32
|
|
#include <sys/wait.h>
|
|
#endif
|
|
|
|
#include <boost/filesystem.hpp>
|
|
|
|
#include <osquery/config.h>
|
|
#include <osquery/filesystem.h>
|
|
#include <osquery/logger.h>
|
|
#include <osquery/sql.h>
|
|
#include <osquery/system.h>
|
|
|
|
#include "osquery/core/process.h"
|
|
#include "osquery/core/watcher.h"
|
|
|
|
namespace fs = boost::filesystem;
|
|
|
|
namespace osquery {
|
|
|
|
struct LimitDefinition {
|
|
size_t normal;
|
|
size_t restrictive;
|
|
size_t disabled;
|
|
};
|
|
|
|
struct PerformanceChange {
|
|
size_t sustained_latency;
|
|
size_t footprint;
|
|
size_t iv;
|
|
pid_t parent;
|
|
};
|
|
|
|
using WatchdogLimitMap = std::map<WatchdogLimitType, LimitDefinition>;
|
|
|
|
const WatchdogLimitMap kWatchdogLimits = {
|
|
// Maximum MB worker can privately allocate.
|
|
{WatchdogLimitType::MEMORY_LIMIT, {200, 100, 10000}},
|
|
// User or system CPU worker can utilize for LATENCY_LIMIT seconds.
|
|
{WatchdogLimitType::UTILIZATION_LIMIT, {90, 80, 1000}},
|
|
// Number of seconds the worker should run, else consider the exit fatal.
|
|
{WatchdogLimitType::RESPAWN_LIMIT, {10, 4, 1000}},
|
|
// If the worker respawns too quickly, backoff on creating additional.
|
|
{WatchdogLimitType::RESPAWN_DELAY, {5, 5, 1}},
|
|
// Seconds of tolerable UTILIZATION_LIMIT sustained latency.
|
|
{WatchdogLimitType::LATENCY_LIMIT, {12, 6, 1000}},
|
|
// How often to poll for performance limit violations.
|
|
{WatchdogLimitType::INTERVAL, {3, 3, 3}},
|
|
};
|
|
|
|
CLI_FLAG(int32,
|
|
watchdog_level,
|
|
0,
|
|
"Performance limit level (0=normal, 1=restrictive, -1=off)");
|
|
|
|
CLI_FLAG(uint64,
|
|
watchdog_memory_limit,
|
|
0,
|
|
"Override watchdog profile memory limit");
|
|
|
|
CLI_FLAG(uint64,
|
|
watchdog_utilization_limit,
|
|
0,
|
|
"Override watchdog profile CPU utilization limit");
|
|
|
|
CLI_FLAG(uint64,
|
|
watchdog_delay,
|
|
60,
|
|
"Initial delay in seconds before watchdog starts");
|
|
|
|
CLI_FLAG(bool, disable_watchdog, false, "Disable userland watchdog process");
|
|
|
|
void Watcher::resetWorkerCounters(size_t respawn_time) {
|
|
// Reset the monitoring counters for the watcher.
|
|
state_.sustained_latency = 0;
|
|
state_.user_time = 0;
|
|
state_.system_time = 0;
|
|
state_.last_respawn_time = respawn_time;
|
|
}
|
|
|
|
void Watcher::resetExtensionCounters(const std::string& extension,
|
|
size_t respawn_time) {
|
|
WatcherExtensionsLocker locker;
|
|
auto& state = get().extension_states_[extension];
|
|
state.sustained_latency = 0;
|
|
state.user_time = 0;
|
|
state.system_time = 0;
|
|
state.last_respawn_time = respawn_time;
|
|
}
|
|
|
|
std::string Watcher::getExtensionPath(const PlatformProcess& child) {
|
|
for (const auto& extension : extensions()) {
|
|
if (*extension.second == child) {
|
|
return extension.first;
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
void Watcher::removeExtensionPath(const std::string& extension) {
|
|
WatcherExtensionsLocker locker;
|
|
extensions_.erase(extension);
|
|
extension_states_.erase(extension);
|
|
}
|
|
|
|
PerformanceState& Watcher::getState(const PlatformProcess& child) {
|
|
if (child == getWorker()) {
|
|
return state_;
|
|
} else {
|
|
return extension_states_[getExtensionPath(child)];
|
|
}
|
|
}
|
|
|
|
PerformanceState& Watcher::getState(const std::string& extension) {
|
|
return extension_states_[extension];
|
|
}
|
|
|
|
void Watcher::setExtension(const std::string& extension,
|
|
const std::shared_ptr<PlatformProcess>& child) {
|
|
WatcherExtensionsLocker locker;
|
|
extensions_[extension] = child;
|
|
}
|
|
|
|
void Watcher::reset(const PlatformProcess& child) {
|
|
if (child == getWorker()) {
|
|
worker_ = std::make_shared<PlatformProcess>();
|
|
resetWorkerCounters(0);
|
|
return;
|
|
}
|
|
|
|
// If it was not the worker pid then find the extension name to reset.
|
|
for (const auto& extension : extensions()) {
|
|
if (*extension.second == child) {
|
|
setExtension(extension.first, std::make_shared<PlatformProcess>());
|
|
resetExtensionCounters(extension.first, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
void Watcher::addExtensionPath(const std::string& path) {
|
|
setExtension(path, std::make_shared<PlatformProcess>());
|
|
resetExtensionCounters(path, 0);
|
|
}
|
|
|
|
bool Watcher::hasManagedExtensions() const {
|
|
if (!extensions_.empty()) {
|
|
return true;
|
|
}
|
|
|
|
// A watchdog process may hint to a worker the number of managed extensions.
|
|
// Setting this counter to 0 will prevent the worker from waiting for missing
|
|
// dependent config plugins. Otherwise, its existence, will cause a worker to
|
|
// wait for missing plugins to broadcast from managed extensions.
|
|
return getEnvVar("OSQUERY_EXTENSIONS").is_initialized();
|
|
}
|
|
|
|
bool WatcherRunner::ok() const {
|
|
// Inspect the exit code, on success or catastrophic, end the watcher.
|
|
auto status = Watcher::get().getWorkerStatus();
|
|
if (status == EXIT_SUCCESS || status == EXIT_CATASTROPHIC) {
|
|
return false;
|
|
}
|
|
// Watcher is OK to run if a worker or at least one extension exists.
|
|
return (Watcher::get().getWorker().isValid() ||
|
|
Watcher::get().hasManagedExtensions());
|
|
}
|
|
|
|
void WatcherRunner::start() {
|
|
// Set worker performance counters to an initial state.
|
|
Watcher::get().resetWorkerCounters(0);
|
|
// Hold the current process (watcher) for inspection too.
|
|
auto watcher = PlatformProcess::getCurrentProcess();
|
|
PerformanceState watcher_state;
|
|
|
|
// Enter the watch loop.
|
|
do {
|
|
if (use_worker_ && !watch(Watcher::get().getWorker())) {
|
|
if (Watcher::get().fatesBound()) {
|
|
// A signal has interrupted the watcher.
|
|
break;
|
|
}
|
|
// The watcher failed, create a worker.
|
|
createWorker();
|
|
}
|
|
|
|
// Loop over every managed extension and check sanity.
|
|
for (const auto& extension : Watcher::get().extensions()) {
|
|
auto s = isChildSane(*extension.second);
|
|
if (!s.ok()) {
|
|
// The extension manager also watches for extension-related failures.
|
|
// The watchdog is more general, but may find failed extensions first.
|
|
createExtension(extension.first);
|
|
extension_restarts_[extension.first] += 1;
|
|
} else {
|
|
extension_restarts_[extension.first] = 0;
|
|
}
|
|
}
|
|
// If any extension creations failed, stop managing them.
|
|
for (auto& extension : extension_restarts_) {
|
|
if (extension.second > 3) {
|
|
Watcher::get().removeExtensionPath(extension.first);
|
|
extension.second = 0;
|
|
}
|
|
}
|
|
|
|
if (use_worker_) {
|
|
auto status = isWatcherHealthy(*watcher, watcher_state);
|
|
if (!status.ok()) {
|
|
Initializer::requestShutdown(
|
|
EXIT_CATASTROPHIC,
|
|
"Watcher has become unhealthy: " + status.getMessage());
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (run_once_) {
|
|
// A test harness can end the thread immediately.
|
|
break;
|
|
}
|
|
pauseMilli(getWorkerLimit(WatchdogLimitType::INTERVAL) * 1000);
|
|
} while (!interrupted() && ok());
|
|
}
|
|
|
|
size_t WatcherRunner::delayedTime() const {
|
|
return Config::getStartTime() + FLAGS_watchdog_delay;
|
|
}
|
|
|
|
bool WatcherRunner::watch(const PlatformProcess& child) const {
|
|
int process_status = 0;
|
|
ProcessState result = child.checkStatus(process_status);
|
|
if (Watcher::get().fatesBound()) {
|
|
// A signal was handled while the watcher was watching.
|
|
return false;
|
|
}
|
|
|
|
if (!child.isValid() || result == PROCESS_ERROR) {
|
|
// Worker does not exist or never existed.
|
|
return false;
|
|
} else if (result == PROCESS_STILL_ALIVE) {
|
|
// If the inspect finds problems it will stop/restart the worker.
|
|
auto status = isChildSane(child);
|
|
// A delayed watchdog does not stop the worker process.
|
|
if (!status.ok() && getUnixTime() >= delayedTime()) {
|
|
LOG(WARNING) << "osqueryd worker (" << child.pid()
|
|
<< "): " << status.getMessage();
|
|
stopChild(child);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
if (result == PROCESS_EXITED) {
|
|
// If the worker process existed, store the exit code.
|
|
Watcher::get().worker_status_ = process_status;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void WatcherRunner::stopChild(const PlatformProcess& child) const {
|
|
child.killGracefully();
|
|
|
|
// Clean up the defunct (zombie) process.
|
|
if (!child.cleanup()) {
|
|
// The child did not exit, force kill and attempt to cleanup again.
|
|
child.kill();
|
|
if (!child.cleanup()) {
|
|
Initializer::requestShutdown(EXIT_CATASTROPHIC,
|
|
"Watcher cannot stop worker process");
|
|
}
|
|
}
|
|
}
|
|
|
|
PerformanceChange getChange(const Row& r, PerformanceState& state) {
|
|
PerformanceChange change;
|
|
|
|
// IV is the check interval in seconds, and utilization is set per-second.
|
|
change.iv = std::max(getWorkerLimit(WatchdogLimitType::INTERVAL), (size_t)1);
|
|
UNSIGNED_BIGINT_LITERAL user_time = 0, system_time = 0;
|
|
try {
|
|
change.parent =
|
|
static_cast<pid_t>(AS_LITERAL(BIGINT_LITERAL, r.at("parent")));
|
|
user_time = AS_LITERAL(BIGINT_LITERAL, r.at("user_time")) / change.iv;
|
|
system_time = AS_LITERAL(BIGINT_LITERAL, r.at("system_time")) / change.iv;
|
|
change.footprint = AS_LITERAL(BIGINT_LITERAL, r.at("resident_size"));
|
|
} catch (const std::exception& /* e */) {
|
|
state.sustained_latency = 0;
|
|
}
|
|
|
|
// Check the difference of CPU time used since last check.
|
|
if (user_time - state.user_time >
|
|
getWorkerLimit(WatchdogLimitType::UTILIZATION_LIMIT) ||
|
|
system_time - state.system_time >
|
|
getWorkerLimit(WatchdogLimitType::UTILIZATION_LIMIT)) {
|
|
state.sustained_latency++;
|
|
} else {
|
|
state.sustained_latency = 0;
|
|
}
|
|
// Update the current CPU time.
|
|
state.user_time = user_time;
|
|
state.system_time = system_time;
|
|
|
|
// Check if the sustained difference exceeded the acceptable latency limit.
|
|
change.sustained_latency = state.sustained_latency;
|
|
|
|
// Set the memory footprint as the amount of resident bytes allocated
|
|
// since the process image was created (estimate).
|
|
// A more-meaningful check would limit this to writable regions.
|
|
if (state.initial_footprint == 0) {
|
|
state.initial_footprint = change.footprint;
|
|
}
|
|
|
|
// Set the measured/limit-applied footprint to the post-launch allocations.
|
|
if (change.footprint < state.initial_footprint) {
|
|
change.footprint = 0;
|
|
} else {
|
|
change.footprint = change.footprint - state.initial_footprint;
|
|
}
|
|
|
|
return change;
|
|
}
|
|
|
|
static bool exceededMemoryLimit(const PerformanceChange& change) {
|
|
if (change.footprint == 0) {
|
|
return false;
|
|
}
|
|
|
|
return (change.footprint >
|
|
getWorkerLimit(WatchdogLimitType::MEMORY_LIMIT) * 1024 * 1024);
|
|
}
|
|
|
|
static bool exceededCyclesLimit(const PerformanceChange& change) {
|
|
if (change.sustained_latency == 0) {
|
|
return false;
|
|
}
|
|
|
|
auto latency = change.sustained_latency * change.iv;
|
|
return (latency >= getWorkerLimit(WatchdogLimitType::LATENCY_LIMIT));
|
|
}
|
|
|
|
Status WatcherRunner::isWatcherHealthy(const PlatformProcess& watcher,
|
|
PerformanceState& watcher_state) const {
|
|
auto rows = getProcessRow(watcher.pid());
|
|
if (rows.size() == 0) {
|
|
// Could not find worker process?
|
|
return Status(1, "Cannot find watcher process");
|
|
}
|
|
|
|
auto change = getChange(rows[0], watcher_state);
|
|
if (exceededMemoryLimit(change)) {
|
|
return Status(1, "Memory limits exceeded");
|
|
}
|
|
|
|
return Status(0);
|
|
}
|
|
|
|
QueryData WatcherRunner::getProcessRow(pid_t pid) const {
|
|
// On Windows, pid_t = DWORD, which is unsigned. However invalidity
|
|
// of processes is denoted by a pid_t of -1. We check for this
|
|
// by comparing the max value of DWORD, or ULONG_MAX, and then casting
|
|
// our query back to an int value, as ULONG_MAX causes boost exceptions
|
|
// as it's out of the range of an int.
|
|
int p = pid;
|
|
#ifdef WIN32
|
|
p = (pid == ULONG_MAX) ? -1 : pid;
|
|
#endif
|
|
return SQL::selectAllFrom("processes", "pid", EQUALS, INTEGER(p));
|
|
}
|
|
|
|
Status WatcherRunner::isChildSane(const PlatformProcess& child) const {
|
|
auto rows = getProcessRow(child.pid());
|
|
if (rows.size() == 0) {
|
|
// Could not find worker process?
|
|
return Status(1, "Cannot find worker process");
|
|
}
|
|
|
|
PerformanceChange change;
|
|
{
|
|
WatcherExtensionsLocker locker;
|
|
auto& state = Watcher::get().getState(child);
|
|
change = getChange(rows[0], state);
|
|
}
|
|
|
|
// Only make a decision about the child sanity if it is still the watcher's
|
|
// child. It's possible for the child to die, and its pid reused.
|
|
if (change.parent != PlatformProcess::getCurrentProcess()->pid()) {
|
|
// The child's parent is not the watcher.
|
|
Watcher::get().reset(child);
|
|
// Do not stop or call the child insane, since it is not our child.
|
|
return Status(0);
|
|
}
|
|
|
|
if (exceededCyclesLimit(change)) {
|
|
return Status(1, "System performance limits exceeded");
|
|
}
|
|
// Check if the private memory exceeds a memory limit.
|
|
if (exceededMemoryLimit(change)) {
|
|
return Status(
|
|
1, "Memory limits exceeded: " + std::to_string(change.footprint));
|
|
}
|
|
|
|
// The worker is sane, no action needed.
|
|
// Attempt to flush status logs to the well-behaved worker.
|
|
if (use_worker_ && child.pid() == Watcher::get().getWorker().pid()) {
|
|
relayStatusLogs();
|
|
}
|
|
|
|
return Status(0);
|
|
}
|
|
|
|
void WatcherRunner::createWorker() {
|
|
auto& watcher = Watcher::get();
|
|
|
|
{
|
|
WatcherExtensionsLocker locker;
|
|
if (watcher.getState(watcher.getWorker()).last_respawn_time >
|
|
getUnixTime() - getWorkerLimit(WatchdogLimitType::RESPAWN_LIMIT)) {
|
|
LOG(WARNING) << "osqueryd worker respawning too quickly: "
|
|
<< watcher.workerRestartCount() << " times";
|
|
watcher.workerRestarted();
|
|
// The configured automatic delay.
|
|
size_t delay = getWorkerLimit(WatchdogLimitType::RESPAWN_DELAY) * 1000;
|
|
// Exponential back off for quickly-respawning clients.
|
|
delay += static_cast<size_t>(pow(2, watcher.workerRestartCount())) * 1000;
|
|
pauseMilli(delay);
|
|
}
|
|
}
|
|
|
|
// Get the path of the current process.
|
|
auto qd =
|
|
SQL::selectAllFrom("processes",
|
|
"pid",
|
|
EQUALS,
|
|
INTEGER(PlatformProcess::getCurrentProcess()->pid()));
|
|
if (qd.size() != 1 || qd[0].count("path") == 0 || qd[0]["path"].size() == 0) {
|
|
LOG(ERROR) << "osquery watcher cannot determine process path for worker";
|
|
Initializer::requestShutdown(EXIT_FAILURE);
|
|
return;
|
|
}
|
|
|
|
// Set an environment signaling to potential plugin-dependent workers to wait
|
|
// for extensions to broadcast.
|
|
if (watcher.hasManagedExtensions()) {
|
|
setEnvVar("OSQUERY_EXTENSIONS", "true");
|
|
}
|
|
|
|
// Get the complete path of the osquery process binary.
|
|
boost::system::error_code ec;
|
|
auto exec_path = fs::system_complete(fs::path(qd[0]["path"]), ec);
|
|
if (!safePermissions(
|
|
exec_path.parent_path().string(), exec_path.string(), true)) {
|
|
// osqueryd binary has become unsafe.
|
|
LOG(ERROR) << RLOG(1382)
|
|
<< "osqueryd has unsafe permissions: " << exec_path.string();
|
|
Initializer::requestShutdown(EXIT_FAILURE);
|
|
return;
|
|
}
|
|
|
|
auto worker = PlatformProcess::launchWorker(exec_path.string(), argc_, argv_);
|
|
if (worker == nullptr) {
|
|
// Unrecoverable error, cannot create a worker process.
|
|
LOG(ERROR) << "osqueryd could not create a worker process";
|
|
Initializer::shutdown(EXIT_FAILURE);
|
|
return;
|
|
}
|
|
|
|
watcher.setWorker(worker);
|
|
watcher.resetWorkerCounters(getUnixTime());
|
|
VLOG(1) << "osqueryd watcher (" << PlatformProcess::getCurrentProcess()->pid()
|
|
<< ") executing worker (" << worker->pid() << ")";
|
|
}
|
|
|
|
void WatcherRunner::createExtension(const std::string& extension) {
|
|
auto& watcher = Watcher::get();
|
|
|
|
{
|
|
WatcherExtensionsLocker locker;
|
|
if (watcher.getState(extension).last_respawn_time >
|
|
getUnixTime() - getWorkerLimit(WatchdogLimitType::RESPAWN_LIMIT)) {
|
|
LOG(WARNING) << "Extension respawning too quickly: " << extension;
|
|
// Unlike a worker, if an extension respawns to quickly we give up.
|
|
}
|
|
}
|
|
|
|
// Check the path to the previously-discovered extension binary.
|
|
boost::system::error_code ec;
|
|
auto exec_path = fs::system_complete(fs::path(extension), ec);
|
|
if (!safePermissions(
|
|
exec_path.parent_path().string(), exec_path.string(), true)) {
|
|
// Extension binary has become unsafe.
|
|
LOG(WARNING) << RLOG(1382)
|
|
<< "Extension binary has unsafe permissions: " << extension;
|
|
return;
|
|
}
|
|
|
|
auto ext_process =
|
|
PlatformProcess::launchExtension(exec_path.string(),
|
|
extension,
|
|
Flag::getValue("extensions_socket"),
|
|
Flag::getValue("extensions_timeout"),
|
|
Flag::getValue("extensions_interval"),
|
|
Flag::getValue("verbose") == "true");
|
|
if (ext_process == nullptr) {
|
|
// Unrecoverable error, cannot create an extension process.
|
|
LOG(ERROR) << "Cannot create extension process: " << extension;
|
|
Initializer::shutdown(EXIT_FAILURE);
|
|
}
|
|
|
|
watcher.setExtension(extension, ext_process);
|
|
watcher.resetExtensionCounters(extension, getUnixTime());
|
|
VLOG(1) << "Created and monitoring extension child (" << ext_process->pid()
|
|
<< "): " << extension;
|
|
}
|
|
|
|
void WatcherWatcherRunner::start() {
|
|
while (!interrupted()) {
|
|
if (isLauncherProcessDead(*watcher_)) {
|
|
// Watcher died, the worker must follow.
|
|
VLOG(1) << "osqueryd worker ("
|
|
<< PlatformProcess::getCurrentProcess()->pid()
|
|
<< ") detected killed watcher (" << watcher_->pid() << ")";
|
|
// The watcher watcher is a thread. Do not join services after removing.
|
|
Initializer::requestShutdown();
|
|
break;
|
|
}
|
|
pauseMilli(getWorkerLimit(WatchdogLimitType::INTERVAL) * 1000);
|
|
}
|
|
}
|
|
|
|
size_t getWorkerLimit(WatchdogLimitType name) {
|
|
if (kWatchdogLimits.count(name) == 0) {
|
|
return 0;
|
|
}
|
|
|
|
if (name == WatchdogLimitType::MEMORY_LIMIT &&
|
|
FLAGS_watchdog_memory_limit > 0) {
|
|
return FLAGS_watchdog_memory_limit;
|
|
}
|
|
|
|
if (name == WatchdogLimitType::UTILIZATION_LIMIT &&
|
|
FLAGS_watchdog_utilization_limit > 0) {
|
|
return FLAGS_watchdog_utilization_limit;
|
|
}
|
|
|
|
auto level = FLAGS_watchdog_level;
|
|
// If no level was provided then use the default (config/switch).
|
|
if (level == -1) {
|
|
return kWatchdogLimits.at(name).disabled;
|
|
}
|
|
|
|
if (level == 1) {
|
|
return kWatchdogLimits.at(name).restrictive;
|
|
}
|
|
return kWatchdogLimits.at(name).normal;
|
|
}
|
|
} // namespace osquery
|