2015-01-26 08:02:02 +00:00
|
|
|
/*
|
2016-02-11 19:48:58 +00:00
|
|
|
* Copyright (c) 2014-present, Facebook, Inc.
|
2015-01-26 08:02:02 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This source code is licensed under the BSD-style license found in the
|
|
|
|
* LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
* of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2016-02-05 03:12:48 +00:00
|
|
|
#include <atomic>
|
2015-01-26 08:02:02 +00:00
|
|
|
#include <string>
|
|
|
|
|
2016-05-14 02:47:45 +00:00
|
|
|
#ifndef WIN32
|
2015-01-26 08:02:02 +00:00
|
|
|
#include <unistd.h>
|
2016-05-14 02:47:45 +00:00
|
|
|
#endif
|
2015-01-26 08:02:02 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
#include <boost/noncopyable.hpp>
|
|
|
|
|
2016-07-21 19:33:14 +00:00
|
|
|
#include <osquery/database.h>
|
2016-03-21 22:27:51 +00:00
|
|
|
#include <osquery/dispatcher.h>
|
2015-02-06 17:42:03 +00:00
|
|
|
#include <osquery/flags.h>
|
|
|
|
|
2016-05-14 02:47:45 +00:00
|
|
|
#include "osquery/core/process.h"
|
|
|
|
|
2015-01-26 08:02:02 +00:00
|
|
|
namespace osquery {
|
|
|
|
|
2016-05-14 02:47:45 +00:00
|
|
|
using ExtensionMap = std::map<std::string, std::shared_ptr<PlatformProcess>>;
|
|
|
|
|
2015-02-06 17:42:03 +00:00
|
|
|
DECLARE_bool(disable_watchdog);
|
2015-06-05 18:20:24 +00:00
|
|
|
DECLARE_int32(watchdog_level);
|
2015-02-06 17:42:03 +00:00
|
|
|
|
2015-06-04 18:46:19 +00:00
|
|
|
class WatcherRunner;
|
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/**
|
|
|
|
* @brief Categories of process performance limitations.
|
|
|
|
*
|
|
|
|
* Performance limits are applied by a watcher thread on autoloaded extensions
|
2015-06-05 18:20:24 +00:00
|
|
|
* and a optional daemon worker process. The performance types are identified
|
2015-03-13 15:11:08 +00:00
|
|
|
* here, and organized into levels. Such that a caller may enforce rigor or
|
|
|
|
* relax the performance expectations of a osquery daemon.
|
|
|
|
*/
|
2016-12-12 05:59:32 +00:00
|
|
|
enum class WatchdogLimitType {
|
2015-02-09 00:00:43 +00:00
|
|
|
MEMORY_LIMIT,
|
|
|
|
UTILIZATION_LIMIT,
|
|
|
|
RESPAWN_LIMIT,
|
|
|
|
RESPAWN_DELAY,
|
|
|
|
LATENCY_LIMIT,
|
|
|
|
INTERVAL,
|
|
|
|
};
|
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/**
|
|
|
|
* @brief A performance state structure for an autoloaded extension or worker.
|
|
|
|
*
|
|
|
|
* A watcher thread will continue to check the performance state, and keep a
|
|
|
|
* last-checked snapshot for each autoloaded extension and worker process.
|
|
|
|
*/
|
|
|
|
struct PerformanceState {
|
|
|
|
/// A counter of how many intervals the process exceeded performance limits.
|
|
|
|
size_t sustained_latency;
|
|
|
|
/// The last checked user CPU time.
|
|
|
|
size_t user_time;
|
|
|
|
/// The last checked system CPU time.
|
|
|
|
size_t system_time;
|
|
|
|
/// A timestamp when the process/worker was last created.
|
|
|
|
size_t last_respawn_time;
|
|
|
|
|
2015-05-04 18:30:25 +00:00
|
|
|
/// The initial (or as close as possible) process image footprint.
|
|
|
|
size_t initial_footprint;
|
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
PerformanceState() {
|
|
|
|
sustained_latency = 0;
|
|
|
|
user_time = 0;
|
|
|
|
system_time = 0;
|
|
|
|
last_respawn_time = 0;
|
2015-05-04 18:30:25 +00:00
|
|
|
initial_footprint = 0;
|
2015-03-13 15:11:08 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief Thread-safe watched child process state manager.
|
|
|
|
*
|
|
|
|
* The Watcher instance is separated from the WatcherRunner thread to allow
|
|
|
|
* signals and osquery-introspection to monitor the autoloaded extensions
|
|
|
|
* and optional worker stats. A child-process change signal may indicate an
|
|
|
|
* autoloaded extension ended. Tables may also report on the historic worker
|
|
|
|
* or extension utilizations.
|
|
|
|
*
|
2015-05-04 03:02:01 +00:00
|
|
|
* Though not critical, it is preferred to remove the extension's broadcasted
|
2015-03-13 15:11:08 +00:00
|
|
|
* routes quickly. Locking access to the extensions list between signals and
|
|
|
|
* the WatcherRunner thread allows osquery to tearDown registry changes before
|
|
|
|
* attempting to respawn an extension process.
|
|
|
|
*/
|
|
|
|
class Watcher : private boost::noncopyable {
|
2015-01-26 08:02:02 +00:00
|
|
|
public:
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Instance accessor
|
2017-05-30 00:16:19 +00:00
|
|
|
static Watcher& get() {
|
2015-03-13 15:11:08 +00:00
|
|
|
static Watcher instance;
|
|
|
|
return instance;
|
2015-01-26 08:02:02 +00:00
|
|
|
}
|
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Reset counters after a worker exits.
|
2017-05-30 00:16:19 +00:00
|
|
|
void resetWorkerCounters(size_t respawn_time);
|
2015-01-26 08:02:02 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Reset counters for an extension path.
|
2017-05-30 00:16:19 +00:00
|
|
|
void resetExtensionCounters(const std::string& extension,
|
|
|
|
size_t respawn_time);
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Lock access to extensions.
|
2017-05-30 00:16:19 +00:00
|
|
|
void lock() {
|
|
|
|
get().lock_.lock();
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Unlock access to extensions.
|
2017-05-30 00:16:19 +00:00
|
|
|
void unlock() {
|
|
|
|
get().lock_.unlock();
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Accessor for autoloadable extension paths.
|
2017-05-30 00:16:19 +00:00
|
|
|
const ExtensionMap& extensions() const {
|
|
|
|
return extensions_;
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Lookup extension path from pid.
|
2017-05-30 00:16:19 +00:00
|
|
|
std::string getExtensionPath(const PlatformProcess& child);
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Remove an autoloadable extension path.
|
2017-05-30 00:16:19 +00:00
|
|
|
void removeExtensionPath(const std::string& extension);
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Add extensions autoloadable paths.
|
2017-05-30 00:16:19 +00:00
|
|
|
void addExtensionPath(const std::string& path);
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Get state information for a worker or extension child.
|
2017-05-30 00:16:19 +00:00
|
|
|
PerformanceState& getState(const PlatformProcess& child);
|
|
|
|
PerformanceState& getState(const std::string& extension);
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Accessor for the worker process.
|
2017-05-30 00:16:19 +00:00
|
|
|
PlatformProcess& getWorker() {
|
|
|
|
return *worker_;
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Setter for worker process.
|
2017-05-30 00:16:19 +00:00
|
|
|
void setWorker(const std::shared_ptr<PlatformProcess>& child) {
|
|
|
|
worker_ = child;
|
2016-05-14 02:47:45 +00:00
|
|
|
}
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Setter for an extension process.
|
2017-05-30 00:16:19 +00:00
|
|
|
void setExtension(const std::string& extension,
|
|
|
|
const std::shared_ptr<PlatformProcess>& child);
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
/// Reset pid and performance counters for a worker or extension process.
|
2017-05-30 00:16:19 +00:00
|
|
|
void reset(const PlatformProcess& child);
|
2015-03-13 15:11:08 +00:00
|
|
|
|
2015-06-04 18:46:19 +00:00
|
|
|
/// Count the number of worker restarts.
|
2017-05-30 00:16:19 +00:00
|
|
|
size_t workerRestartCount() const {
|
|
|
|
return worker_restarts_;
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-06-04 18:46:19 +00:00
|
|
|
|
2015-09-02 23:53:29 +00:00
|
|
|
/// Become responsible for the worker's fate, but do not guarantee its safety.
|
2017-05-30 00:16:19 +00:00
|
|
|
void bindFates() {
|
|
|
|
restart_worker_ = false;
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-09-02 23:53:29 +00:00
|
|
|
|
|
|
|
/// Check if the worker and watcher's fates are bound.
|
2017-05-30 00:16:19 +00:00
|
|
|
bool fatesBound() const {
|
|
|
|
return !restart_worker_;
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-09-02 23:53:29 +00:00
|
|
|
|
2015-04-24 08:44:41 +00:00
|
|
|
/**
|
|
|
|
* @brief Return the state of autoloadable extensions.
|
|
|
|
*
|
|
|
|
* Some initialization decisions are made based on waiting for plugins to
|
|
|
|
* broadcast from potentially-loaded extensions. If no extensions are loaded
|
2015-05-04 03:02:01 +00:00
|
|
|
* and an active (selected at command line) plugin is missing, fail quickly.
|
2015-04-24 08:44:41 +00:00
|
|
|
*/
|
2017-05-30 00:16:19 +00:00
|
|
|
bool hasManagedExtensions() const;
|
2015-01-26 08:02:02 +00:00
|
|
|
|
2016-02-05 03:12:48 +00:00
|
|
|
/// Check the status of the last worker.
|
2017-05-30 00:16:19 +00:00
|
|
|
int getWorkerStatus() const {
|
|
|
|
return worker_status_;
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2016-02-05 03:12:48 +00:00
|
|
|
|
2015-01-26 08:02:02 +00:00
|
|
|
private:
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Do not request the lock until extensions are used.
|
2015-06-05 18:20:24 +00:00
|
|
|
Watcher()
|
2016-05-14 02:47:45 +00:00
|
|
|
: worker_(std::make_shared<PlatformProcess>()),
|
|
|
|
worker_restarts_(0),
|
|
|
|
lock_(mutex_, std::defer_lock) {}
|
2015-03-13 15:11:08 +00:00
|
|
|
Watcher(Watcher const&);
|
2016-02-05 03:12:48 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
void operator=(Watcher const&);
|
|
|
|
virtual ~Watcher() {}
|
2015-01-26 08:02:02 +00:00
|
|
|
|
2015-06-04 18:46:19 +00:00
|
|
|
private:
|
|
|
|
/// Inform the watcher that the worker restarted without cause.
|
2017-05-30 00:16:19 +00:00
|
|
|
void workerRestarted() {
|
|
|
|
worker_restarts_++;
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-06-04 18:46:19 +00:00
|
|
|
|
2015-01-26 08:02:02 +00:00
|
|
|
private:
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Performance state for the worker process.
|
|
|
|
PerformanceState state_;
|
2016-02-05 03:12:48 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Performance states for each autoloadable extension binary.
|
|
|
|
std::map<std::string, PerformanceState> extension_states_;
|
2015-01-26 08:02:02 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
/// Keep the single worker process/thread ID for inspection.
|
2016-05-14 02:47:45 +00:00
|
|
|
std::shared_ptr<PlatformProcess> worker_;
|
2016-02-05 03:12:48 +00:00
|
|
|
|
2015-06-05 18:20:24 +00:00
|
|
|
/// Number of worker restarts NOT induced by a watchdog process.
|
2015-09-02 23:53:29 +00:00
|
|
|
size_t worker_restarts_{0};
|
2016-02-05 03:12:48 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Keep a list of resolved extension paths and their managed pids.
|
2016-05-14 02:47:45 +00:00
|
|
|
ExtensionMap extensions_;
|
2016-02-05 03:12:48 +00:00
|
|
|
|
2015-04-24 08:44:41 +00:00
|
|
|
/// Paths to autoload extensions.
|
2015-03-13 15:11:08 +00:00
|
|
|
std::vector<std::string> extensions_paths_;
|
2016-02-05 03:12:48 +00:00
|
|
|
|
2015-09-02 23:53:29 +00:00
|
|
|
/// Bind the fate of the watcher to the worker.
|
|
|
|
bool restart_worker_{true};
|
2015-03-13 15:11:08 +00:00
|
|
|
|
2016-02-05 03:12:48 +00:00
|
|
|
/// Record the exit status of the most recent worker.
|
|
|
|
std::atomic<int> worker_status_{-1};
|
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
private:
|
|
|
|
/// Mutex and lock around extensions access.
|
2016-03-11 08:30:20 +00:00
|
|
|
Mutex mutex_;
|
2016-02-05 03:12:48 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Mutex and lock around extensions access.
|
2016-03-11 08:30:20 +00:00
|
|
|
std::unique_lock<Mutex> lock_;
|
2015-06-04 18:46:19 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
friend class WatcherRunner;
|
2015-03-13 15:11:08 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief A scoped locker for iterating over watcher extensions.
|
|
|
|
*
|
|
|
|
* A lock must be used if any part of osquery wants to enumerate the autoloaded
|
|
|
|
* extensions or autoloadable extension paths a Watcher may be monitoring.
|
|
|
|
* A signal or WatcherRunner thread may stop or start extensions.
|
|
|
|
*/
|
2017-05-30 00:16:19 +00:00
|
|
|
class WatcherExtensionsLocker {
|
2015-03-13 15:11:08 +00:00
|
|
|
public:
|
|
|
|
/// Construct and gain watcher lock.
|
2017-05-30 00:16:19 +00:00
|
|
|
WatcherExtensionsLocker() {
|
|
|
|
Watcher::get().lock();
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2016-07-21 19:33:14 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Destruct and release watcher lock.
|
2017-05-30 00:16:19 +00:00
|
|
|
~WatcherExtensionsLocker() {
|
|
|
|
Watcher::get().unlock();
|
2016-12-12 05:59:32 +00:00
|
|
|
}
|
2015-03-13 15:11:08 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief The watchdog thread responsible for spawning/monitoring children.
|
|
|
|
*
|
2015-04-24 08:44:41 +00:00
|
|
|
* The WatcherRunner thread will spawn any autoloaded extensions or optional
|
2015-03-13 15:11:08 +00:00
|
|
|
* osquery daemon worker processes. It will then poll for their performance
|
2015-04-24 08:44:41 +00:00
|
|
|
* state and kill/respawn osquery child processes if they violate limits.
|
2015-03-13 15:11:08 +00:00
|
|
|
*/
|
|
|
|
class WatcherRunner : public InternalRunnable {
|
|
|
|
public:
|
|
|
|
/**
|
|
|
|
* @brief Construct a watcher thread.
|
|
|
|
*
|
|
|
|
* @param argc The osquery process argc.
|
|
|
|
* @param argv The osquery process argv.
|
|
|
|
* @param use_worker True if the process should spawn and monitor a worker.
|
|
|
|
*/
|
|
|
|
explicit WatcherRunner(int argc, char** argv, bool use_worker)
|
|
|
|
: argc_(argc), argv_(argv), use_worker_(use_worker) {
|
|
|
|
(void)argc_;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2015-04-24 08:44:41 +00:00
|
|
|
/// Dispatcher (this service thread's) entry point.
|
2015-05-06 00:09:07 +00:00
|
|
|
void start();
|
2016-07-21 19:33:14 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Boilerplate function to sleep for some configured latency
|
2017-05-30 00:16:19 +00:00
|
|
|
bool ok() const;
|
2016-07-21 19:33:14 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Begin the worker-watcher process.
|
2016-07-21 19:33:14 +00:00
|
|
|
virtual bool watch(const PlatformProcess& child) const;
|
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Inspect into the memory, CPU, and other worker/extension process states.
|
2016-07-21 19:33:14 +00:00
|
|
|
virtual Status isChildSane(const PlatformProcess& child) const;
|
|
|
|
|
|
|
|
/// Inspect into the memory and CPU of the watcher process.
|
|
|
|
virtual Status isWatcherHealthy(const PlatformProcess& watcher,
|
|
|
|
PerformanceState& watcher_state) const;
|
|
|
|
|
|
|
|
/// Get row data from the processes table for a given pid.
|
|
|
|
virtual QueryData getProcessRow(pid_t pid) const;
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
private:
|
2015-04-24 08:44:41 +00:00
|
|
|
/// Fork and execute a worker process.
|
2016-07-21 19:33:14 +00:00
|
|
|
virtual void createWorker();
|
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Fork an extension process.
|
2017-01-26 20:33:23 +00:00
|
|
|
virtual void createExtension(const std::string& extension);
|
2016-07-21 19:33:14 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// If a worker/extension has otherwise gone insane, stop it.
|
2016-07-21 19:33:14 +00:00
|
|
|
virtual void stopChild(const PlatformProcess& child) const;
|
|
|
|
|
2017-06-08 17:03:30 +00:00
|
|
|
/// Return the time the watchdog is delayed until (from start of watcher).
|
|
|
|
size_t delayedTime() const;
|
|
|
|
|
2016-07-21 19:33:14 +00:00
|
|
|
private:
|
|
|
|
/// For testing only, ask the WatcherRunner to run a start loop once.
|
2016-12-12 05:59:32 +00:00
|
|
|
void runOnce() {
|
|
|
|
run_once_ = true;
|
|
|
|
}
|
2015-03-13 15:11:08 +00:00
|
|
|
|
|
|
|
private:
|
2015-01-26 08:02:02 +00:00
|
|
|
/// Keep the invocation daemon's argc to iterate through argv.
|
2015-09-02 23:53:29 +00:00
|
|
|
int argc_{0};
|
2016-07-21 19:33:14 +00:00
|
|
|
|
2015-05-04 03:02:01 +00:00
|
|
|
/// When a worker child is spawned the argv will be scrubbed.
|
2015-09-02 23:53:29 +00:00
|
|
|
char** argv_{nullptr};
|
2016-07-21 19:33:14 +00:00
|
|
|
|
2015-03-13 15:11:08 +00:00
|
|
|
/// Spawn/monitor a worker process.
|
2015-09-02 23:53:29 +00:00
|
|
|
bool use_worker_{false};
|
2016-07-21 19:33:14 +00:00
|
|
|
|
|
|
|
/// If set, the ::start method will run once and return.
|
|
|
|
bool run_once_{false};
|
|
|
|
|
2016-09-21 23:17:30 +00:00
|
|
|
/// Similarly to the uncontrolled worker restarted, count each extension.
|
|
|
|
std::map<std::string, size_t> extension_restarts_;
|
|
|
|
|
2016-07-21 19:33:14 +00:00
|
|
|
private:
|
|
|
|
FRIEND_TEST(WatcherTests, test_watcherrunner_watch);
|
|
|
|
FRIEND_TEST(WatcherTests, test_watcherrunner_stop);
|
|
|
|
FRIEND_TEST(WatcherTests, test_watcherrunner_loop);
|
|
|
|
FRIEND_TEST(WatcherTests, test_watcherrunner_loop_failure);
|
|
|
|
FRIEND_TEST(WatcherTests, test_watcherrunner_loop_disabled);
|
|
|
|
FRIEND_TEST(WatcherTests, test_watcherrunner_watcherhealth);
|
2017-06-08 17:03:30 +00:00
|
|
|
FRIEND_TEST(WatcherTests, test_watcherrunner_unhealthy_delay);
|
2015-01-26 08:02:02 +00:00
|
|
|
};
|
|
|
|
|
2015-02-06 17:42:03 +00:00
|
|
|
/// The WatcherWatcher is spawned within the worker and watches the watcher.
|
|
|
|
class WatcherWatcherRunner : public InternalRunnable {
|
|
|
|
public:
|
2016-05-14 02:47:45 +00:00
|
|
|
explicit WatcherWatcherRunner(const std::shared_ptr<PlatformProcess>& watcher)
|
|
|
|
: watcher_(watcher) {}
|
2015-06-05 18:20:24 +00:00
|
|
|
|
|
|
|
/// Runnable thread's entry point.
|
2015-05-06 00:09:07 +00:00
|
|
|
void start();
|
2015-02-06 17:42:03 +00:00
|
|
|
|
|
|
|
private:
|
2015-06-05 18:20:24 +00:00
|
|
|
/// Parent, or watchdog, process ID.
|
2016-05-14 02:47:45 +00:00
|
|
|
std::shared_ptr<PlatformProcess> watcher_;
|
2015-02-06 17:42:03 +00:00
|
|
|
};
|
|
|
|
|
2015-02-09 00:00:43 +00:00
|
|
|
/// Get a performance limit by name and optional level.
|
2016-07-01 00:53:05 +00:00
|
|
|
size_t getWorkerLimit(WatchdogLimitType limit);
|
2015-01-26 08:02:02 +00:00
|
|
|
}
|