mirror of
https://github.com/valitydev/osquery-1.git
synced 2024-11-07 09:58:54 +00:00
Persistent hashing of `hash' virtual table (#3814)
This commit is contained in:
parent
9726d41c7c
commit
3e83bfbe4a
@ -8,16 +8,34 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
// clang-format off
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
// clang-format on
|
||||
|
||||
#ifndef WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <openssl/md5.h>
|
||||
#include <openssl/sha.h>
|
||||
|
||||
#include <boost/filesystem.hpp>
|
||||
|
||||
#include <osquery/core.h>
|
||||
#include <osquery/filesystem.h>
|
||||
#include <osquery/flags.h>
|
||||
#include <osquery/logger.h>
|
||||
#include <osquery/tables.h>
|
||||
|
||||
@ -25,6 +43,15 @@
|
||||
|
||||
namespace osquery {
|
||||
|
||||
FLAG(bool,
|
||||
enable_hash_cache,
|
||||
false,
|
||||
"Cache calculated file hashes, re-calculate only if file has changed");
|
||||
FLAG(uint32, hash_cache_max, 500, "Cache hashes for upto this number of files");
|
||||
|
||||
// Clear this amount of rows every time eviction is triggered
|
||||
#define HASH_CACHE_EVICT_SIZE 5
|
||||
|
||||
#define HASH_CHUNK_SIZE 4096
|
||||
|
||||
Hash::~Hash() {
|
||||
@ -140,7 +167,106 @@ std::string hashFromFile(HashType hash_type, const std::string& path) {
|
||||
}
|
||||
}
|
||||
|
||||
namespace tables {
|
||||
/**
|
||||
* @brief Implements persistent in-memory caching of files' hashes.
|
||||
*
|
||||
* This cache has LRU eviction policy. The hash is recalculated
|
||||
* every time the mtime or size of the file changes.
|
||||
*/
|
||||
struct FileHashCache {
|
||||
time_t file_mtime;
|
||||
off_t file_size;
|
||||
time_t cache_access_time;
|
||||
MultiHashes hashes;
|
||||
std::string path;
|
||||
|
||||
/// comparison function for organizing the LRU heap
|
||||
static bool greater(const FileHashCache* l, const FileHashCache* r) {
|
||||
return l->cache_access_time > r->cache_access_time;
|
||||
}
|
||||
/**
|
||||
* @brief Do-it-all access function.
|
||||
*
|
||||
* Maintains the cache of hash sums, stats file at path, if it has changed or
|
||||
* it is not present in cache calculates the hashes and caches the result
|
||||
*
|
||||
* @param path the path of file to hash
|
||||
* @param out stores the calculated hashes
|
||||
*
|
||||
* @return true if succeeded, false if something went wrong
|
||||
*/
|
||||
static bool load(const std::string& path, MultiHashes& out);
|
||||
};
|
||||
|
||||
#if defined(WIN32)
|
||||
|
||||
#define stat _stat
|
||||
#define strerror_r(e, buf, sz) strerror_s((buf), (sz), (e))
|
||||
|
||||
#endif
|
||||
|
||||
bool FileHashCache::load(const std::string& path, MultiHashes& out) {
|
||||
// synchronize the access to cache
|
||||
static Mutex mx;
|
||||
// path => cache entry
|
||||
static std::unordered_map<std::string, FileHashCache> cache;
|
||||
// minheap on cache_access_time
|
||||
static std::vector<FileHashCache*> lru;
|
||||
|
||||
WriteLock guard(mx);
|
||||
|
||||
struct stat st;
|
||||
if (stat(path.c_str(), &st) != 0) {
|
||||
char buf[0x200] = {0};
|
||||
strerror_r(errno, buf, sizeof(buf));
|
||||
LOG(WARNING) << "Cannot stat file: " << path << ": " << buf;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto entry = cache.find(path);
|
||||
if (entry == cache.end()) { // none, load
|
||||
if (cache.size() >= FLAGS_hash_cache_max) {
|
||||
// too large, evict
|
||||
for (size_t i = 0; i < HASH_CACHE_EVICT_SIZE; ++i) {
|
||||
if (lru.empty()) {
|
||||
continue;
|
||||
}
|
||||
std::string key = lru[0]->path;
|
||||
std::pop_heap(lru.begin(), lru.end(), FileHashCache::greater);
|
||||
lru.pop_back();
|
||||
if (cache.find(key) != cache.end()) {
|
||||
cache.erase(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
auto hashes = hashMultiFromFile(
|
||||
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
|
||||
FileHashCache rec = {st.st_mtime, // .file_mtime
|
||||
st.st_size, // .file_size
|
||||
time(nullptr), // .cache_access_time
|
||||
std::move(hashes), // .hashes
|
||||
path}; // .path
|
||||
cache[path] = std::move(rec);
|
||||
lru.push_back(&cache[path]);
|
||||
std::push_heap(lru.begin(), lru.end(), FileHashCache::greater);
|
||||
out = cache[path].hashes;
|
||||
} else if (st.st_size != entry->second.file_size ||
|
||||
st.st_mtime != entry->second.file_mtime) { // changed, update
|
||||
auto hashes = hashMultiFromFile(
|
||||
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
|
||||
entry->second.cache_access_time = time(0);
|
||||
entry->second.file_mtime = st.st_mtime;
|
||||
entry->second.file_size = st.st_size;
|
||||
entry->second.hashes = std::move(hashes);
|
||||
std::make_heap(lru.begin(), lru.end(), FileHashCache::greater);
|
||||
out = entry->second.hashes;
|
||||
} else { // ok, got it
|
||||
out = entry->second.hashes;
|
||||
entry->second.cache_access_time = time(0);
|
||||
std::make_heap(lru.begin(), lru.end(), FileHashCache::greater);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void genHashForFile(const std::string& path,
|
||||
const std::string& dir,
|
||||
@ -149,11 +275,17 @@ void genHashForFile(const std::string& path,
|
||||
// Must provide the path, filename, directory separate from boost path->string
|
||||
// helpers to match any explicit (query-parsed) predicate constraints.
|
||||
Row r;
|
||||
|
||||
if (context.isCached(path)) {
|
||||
r = context.getCache(path);
|
||||
} else {
|
||||
auto hashes = hashMultiFromFile(
|
||||
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
|
||||
MultiHashes hashes;
|
||||
if (FLAGS_enable_hash_cache) {
|
||||
FileHashCache::load(path, hashes);
|
||||
} else {
|
||||
hashes = hashMultiFromFile(
|
||||
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
|
||||
}
|
||||
|
||||
r["path"] = path;
|
||||
r["directory"] = dir;
|
||||
@ -165,6 +297,8 @@ void genHashForFile(const std::string& path,
|
||||
results.push_back(r);
|
||||
}
|
||||
|
||||
namespace tables {
|
||||
|
||||
QueryData genHash(QueryContext& context) {
|
||||
QueryData results;
|
||||
boost::system::error_code ec;
|
||||
|
@ -10,7 +10,12 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include <osquery/core.h>
|
||||
#include <osquery/filesystem.h>
|
||||
#include <osquery/flags.h>
|
||||
#include <osquery/logger.h>
|
||||
#include <osquery/sql.h>
|
||||
#include <osquery/tables.h>
|
||||
@ -19,6 +24,9 @@
|
||||
#include "osquery/tests/test_util.h"
|
||||
|
||||
namespace osquery {
|
||||
|
||||
DECLARE_bool(enable_hash_cache);
|
||||
|
||||
namespace tables {
|
||||
|
||||
class SystemsTablesTests : public testing::Test {};
|
||||
@ -160,5 +168,83 @@ TEST_F(SystemsTablesTests, test_abstract_joins) {
|
||||
ASSERT_GT(results.rows().size(), 1U);
|
||||
}
|
||||
}
|
||||
|
||||
class HashTableTest : public testing::Test {
|
||||
public:
|
||||
const std::vector<std::string> content{"31337 hax0r", "random n00b"};
|
||||
const std::string contentMd5 = "2adfc0fd337a144cb2f8abd7cb0bf98e";
|
||||
const std::string contentSha1 = "21bd89f4580ef635e87f655fab5807a01e0ff2e9";
|
||||
const std::string contentSha256 =
|
||||
"6f1c16ac918f64721d14ff4bb3c51fe25ffde92f795ce6dbeb45722ce9d6e05c";
|
||||
const std::string badContentMd5 = "e1cd6c58b0d4d9d7bcbfc0ec2b55ce94";
|
||||
|
||||
void SetContent(int n) {
|
||||
if (pathExists(tmpPath)) {
|
||||
boost::filesystem::resize_file(tmpPath, 0);
|
||||
}
|
||||
writeTextFile(tmpPath, content[n]);
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
tmpPath = boost::filesystem::temp_directory_path();
|
||||
tmpPath /= boost::filesystem::unique_path(
|
||||
"osquery_hash_t_test-%%%%-%%%%-%%%%-%%%%");
|
||||
qry = std::string("select md5, sha1, sha256 from hash where path='") +
|
||||
tmpPath.string() + "'";
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
removePath(tmpPath);
|
||||
}
|
||||
|
||||
boost::filesystem::path tmpPath;
|
||||
std::string qry;
|
||||
};
|
||||
|
||||
TEST_F(HashTableTest, hashes_are_correct) {
|
||||
SetContent(0);
|
||||
SQL results(qry);
|
||||
auto rows = results.rows();
|
||||
ASSERT_EQ(rows.size(), 1U);
|
||||
EXPECT_EQ(rows[0].at("md5"), contentMd5);
|
||||
EXPECT_EQ(rows[0].at("sha1"), contentSha1);
|
||||
EXPECT_EQ(rows[0].at("sha256"), contentSha256);
|
||||
}
|
||||
|
||||
TEST_F(HashTableTest, test_cache_works) {
|
||||
FLAGS_enable_hash_cache = true;
|
||||
time_t last_mtime = 0;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
SetContent(i);
|
||||
if (last_mtime == 0) {
|
||||
last_mtime = boost::filesystem::last_write_time(tmpPath);
|
||||
} else {
|
||||
// make sure mtime doesn't change
|
||||
boost::filesystem::last_write_time(tmpPath, last_mtime);
|
||||
}
|
||||
SQL results(qry);
|
||||
auto rows = results.rows();
|
||||
ASSERT_EQ(rows.size(), 1U);
|
||||
EXPECT_EQ(rows[0].at("md5"), contentMd5);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(HashTableTest, test_cache_updates) {
|
||||
FLAGS_enable_hash_cache = true;
|
||||
SetContent(0);
|
||||
// cache the current state
|
||||
SQL r1(qry);
|
||||
ASSERT_EQ(r1.rows().size(), 1U);
|
||||
|
||||
SetContent(1);
|
||||
// now() - 1 hour, just in case
|
||||
boost::filesystem::last_write_time(tmpPath, time(nullptr) - 60 * 60);
|
||||
SQL r2(qry);
|
||||
auto rows = r2.rows();
|
||||
ASSERT_EQ(rows.size(), 1U);
|
||||
EXPECT_NE(rows[0].at("md5"), contentMd5);
|
||||
EXPECT_EQ(rows[0].at("md5"), badContentMd5);
|
||||
}
|
||||
} // namespace tables
|
||||
} // namespace osquery
|
||||
|
Loading…
Reference in New Issue
Block a user