mirror of
https://github.com/valitydev/osquery-1.git
synced 2024-11-07 18:08:53 +00:00
Speed up file hashing
This commit is contained in:
parent
1a1b07b5c6
commit
59750ec87d
@ -8,6 +8,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace osquery {
|
||||
@ -23,6 +25,14 @@ enum HashType {
|
||||
HASH_TYPE_SHA256 = 8,
|
||||
};
|
||||
|
||||
/// A result structure for multiple hash requests.
|
||||
struct MultiHashes {
|
||||
int mask;
|
||||
std::string md5;
|
||||
std::string sha1;
|
||||
std::string sha256;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Hash is a general utility class for hashing content
|
||||
*
|
||||
@ -33,7 +43,7 @@ enum HashType {
|
||||
* @endcode
|
||||
*
|
||||
*/
|
||||
class Hash {
|
||||
class Hash : private boost::noncopyable {
|
||||
public:
|
||||
/**
|
||||
* @brief Hash constructor
|
||||
@ -108,4 +118,7 @@ std::string hashFromBuffer(HashType hash_type, const void* buffer, size_t size);
|
||||
* @return A string (hex) representation of the hash digest.
|
||||
*/
|
||||
std::string hashFromFile(HashType hash_type, const std::string& path);
|
||||
|
||||
/// Get multiple hashes from a file simultaneously.
|
||||
MultiHashes hashMultiFromFile(int mask, const std::string& path);
|
||||
}
|
||||
|
@ -94,35 +94,69 @@ std::string hashFromBuffer(HashType hash_type,
|
||||
return hash.digest();
|
||||
}
|
||||
|
||||
std::string hashFromFile(HashType hash_type, const std::string& path) {
|
||||
MultiHashes hashMultiFromFile(int mask, const std::string& path) {
|
||||
// Perform a dry-run of a file read without filling in any content.
|
||||
auto status = readFile(path);
|
||||
if (!status.ok()) {
|
||||
return "";
|
||||
return MultiHashes();
|
||||
}
|
||||
|
||||
// Drop privileges to the user controlling the file.
|
||||
auto dropper = DropPrivileges::get();
|
||||
if (!dropper->dropToParent(path)) {
|
||||
return "";
|
||||
std::map<HashType, std::shared_ptr<Hash> > hashes = {
|
||||
{HASH_TYPE_MD5, std::make_shared<Hash>(HASH_TYPE_MD5)},
|
||||
{HASH_TYPE_SHA1, std::make_shared<Hash>(HASH_TYPE_SHA1)},
|
||||
{HASH_TYPE_SHA256, std::make_shared<Hash>(HASH_TYPE_SHA256)},
|
||||
};
|
||||
|
||||
{
|
||||
// Drop privileges to the user controlling the file.
|
||||
auto dropper = DropPrivileges::get();
|
||||
if (!dropper->dropToParent(path)) {
|
||||
return MultiHashes();
|
||||
}
|
||||
|
||||
// Use the canonicalized path returned from a successful readFile dry-run.
|
||||
FILE* file = fopen(status.what().c_str(), "rb");
|
||||
if (file == nullptr) {
|
||||
VLOG(1) << "Cannot hash/open file: " << path;
|
||||
return MultiHashes();
|
||||
}
|
||||
|
||||
// Then call updates with read chunks.
|
||||
size_t bytes_read = 0;
|
||||
unsigned char buffer[HASH_CHUNK_SIZE];
|
||||
while ((bytes_read = fread(buffer, 1, HASH_CHUNK_SIZE, file))) {
|
||||
for (auto& hash : hashes) {
|
||||
if (mask & hash.first) {
|
||||
hash.second->update(buffer, bytes_read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
Hash hash(hash_type);
|
||||
// Use the canonicalized path returned from a successful readFile dry-run.
|
||||
FILE* file = fopen(status.what().c_str(), "rb");
|
||||
if (file == nullptr) {
|
||||
VLOG(1) << "Cannot hash/open file: " << path;
|
||||
return "";
|
||||
MultiHashes mh;
|
||||
mh.mask = mask;
|
||||
if (mask & HASH_TYPE_MD5) {
|
||||
mh.md5 = hashes.at(HASH_TYPE_MD5)->digest();
|
||||
}
|
||||
|
||||
// Then call updates with read chunks.
|
||||
size_t bytes_read = 0;
|
||||
unsigned char buffer[HASH_CHUNK_SIZE];
|
||||
while ((bytes_read = fread(buffer, 1, HASH_CHUNK_SIZE, file))) {
|
||||
hash.update(buffer, bytes_read);
|
||||
if (mask & HASH_TYPE_SHA1) {
|
||||
mh.sha1 = hashes.at(HASH_TYPE_SHA1)->digest();
|
||||
}
|
||||
if (mask & HASH_TYPE_SHA256) {
|
||||
mh.sha256 = hashes.at(HASH_TYPE_SHA256)->digest();
|
||||
}
|
||||
return mh;
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
return hash.digest();
|
||||
std::string hashFromFile(HashType hash_type, const std::string& path) {
|
||||
auto hashes = hashMultiFromFile(hash_type, path);
|
||||
if (hash_type == HASH_TYPE_MD5) {
|
||||
return hashes.md5;
|
||||
} else if (hash_type == HASH_TYPE_SHA1) {
|
||||
return hashes.sha1;
|
||||
} else {
|
||||
return hashes.sha256;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -85,9 +85,11 @@ Status FileEventSubscriber::Callback(const FSEventsEventContextRef& ec,
|
||||
|
||||
// Only hash if the file content could have been modified.
|
||||
if (ec->action == "CREATED" || ec->action == "UPDATED") {
|
||||
r["md5"] = hashFromFile(HASH_TYPE_MD5, ec->path);
|
||||
r["sha1"] = hashFromFile(HASH_TYPE_SHA1, ec->path);
|
||||
r["sha256"] = hashFromFile(HASH_TYPE_SHA256, ec->path);
|
||||
auto hashes = hashMultiFromFile(
|
||||
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, ec->path);
|
||||
r["md5"] = std::move(hashes.md5);
|
||||
r["sha1"] = std::move(hashes.sha1);
|
||||
r["sha256"] = std::move(hashes.sha256);
|
||||
}
|
||||
|
||||
if (ec->action != "") {
|
||||
|
@ -85,9 +85,11 @@ Status FileEventSubscriber::Callback(const ECRef& ec, const SCRef& sc) {
|
||||
r["transaction_id"] = INTEGER(ec->event->cookie);
|
||||
|
||||
if (ec->action == "CREATED" || ec->action == "UPDATED") {
|
||||
r["md5"] = hashFromFile(HASH_TYPE_MD5, ec->path);
|
||||
r["sha1"] = hashFromFile(HASH_TYPE_SHA1, ec->path);
|
||||
r["sha256"] = hashFromFile(HASH_TYPE_SHA256, ec->path);
|
||||
auto hashes = hashMultiFromFile(
|
||||
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, ec->path);
|
||||
r["md5"] = std::move(hashes.md5);
|
||||
r["sha1"] = std::move(hashes.sha1);
|
||||
r["sha256"] = std::move(hashes.sha256);
|
||||
}
|
||||
|
||||
if (ec->action != "" && ec->action != "OPENED") {
|
||||
|
@ -256,13 +256,7 @@ void DeviceHelper::generateFiles(const std::string& partition,
|
||||
}
|
||||
}
|
||||
|
||||
struct DeviceHashes {
|
||||
std::string md5;
|
||||
std::string sha1;
|
||||
std::string sha256;
|
||||
};
|
||||
|
||||
DeviceHashes hashInode(TskFsFile* file) {
|
||||
MultiHashes hashInode(TskFsFile* file) {
|
||||
Hash md5(HASH_TYPE_MD5);
|
||||
Hash sha1(HASH_TYPE_SHA1);
|
||||
Hash sha256(HASH_TYPE_SHA256);
|
||||
@ -270,7 +264,7 @@ DeviceHashes hashInode(TskFsFile* file) {
|
||||
// We are guaranteed by the expected callsite to have a valid meta.
|
||||
auto* meta = file->getMeta();
|
||||
if (meta == nullptr) {
|
||||
return DeviceHashes();
|
||||
return MultiHashes();
|
||||
}
|
||||
|
||||
// Set a maximum 'chunk' or block size to 1 page or the file size.
|
||||
@ -290,7 +284,7 @@ DeviceHashes hashInode(TskFsFile* file) {
|
||||
// Huge problem, either a read failed or didn't read the max size.
|
||||
free(buffer);
|
||||
delete meta;
|
||||
return DeviceHashes();
|
||||
return MultiHashes();
|
||||
}
|
||||
|
||||
md5.update(buffer, chunk_size);
|
||||
@ -302,7 +296,7 @@ DeviceHashes hashInode(TskFsFile* file) {
|
||||
delete meta;
|
||||
|
||||
// Convert the set of hashes into a device hashes transport.
|
||||
DeviceHashes dhs;
|
||||
MultiHashes dhs;
|
||||
dhs.md5 = md5.digest();
|
||||
dhs.sha1 = sha1.digest();
|
||||
dhs.sha256 = sha256.digest();
|
||||
|
@ -27,9 +27,11 @@ void genHashForFile(const std::string& path,
|
||||
Row r;
|
||||
r["path"] = path;
|
||||
r["directory"] = dir;
|
||||
r["md5"] = osquery::hashFromFile(HASH_TYPE_MD5, path);
|
||||
r["sha1"] = osquery::hashFromFile(HASH_TYPE_SHA1, path);
|
||||
r["sha256"] = osquery::hashFromFile(HASH_TYPE_SHA256, path);
|
||||
auto hashes = hashMultiFromFile(
|
||||
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
|
||||
r["md5"] = std::move(hashes.md5);
|
||||
r["sha1"] = std::move(hashes.sha1);
|
||||
r["sha256"] = std::move(hashes.sha256);
|
||||
results.push_back(r);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user