Speed up file hashing

This commit is contained in:
Teddy Reed 2015-12-11 00:36:16 -08:00
parent 1a1b07b5c6
commit 59750ec87d
6 changed files with 87 additions and 40 deletions

View File

@ -8,6 +8,8 @@
*
*/
#include <boost/noncopyable.hpp>
#include <string>
namespace osquery {
@ -23,6 +25,14 @@ enum HashType {
HASH_TYPE_SHA256 = 8,
};
/// A result structure for multiple hash requests.
struct MultiHashes {
int mask;
std::string md5;
std::string sha1;
std::string sha256;
};
/**
* @brief Hash is a general utility class for hashing content
*
@ -33,7 +43,7 @@ enum HashType {
* @endcode
*
*/
class Hash {
class Hash : private boost::noncopyable {
public:
/**
* @brief Hash constructor
@ -108,4 +118,7 @@ std::string hashFromBuffer(HashType hash_type, const void* buffer, size_t size);
* @return A string (hex) representation of the hash digest.
*/
std::string hashFromFile(HashType hash_type, const std::string& path);
/// Get multiple hashes from a file simultaneously.
MultiHashes hashMultiFromFile(int mask, const std::string& path);
}

View File

@ -94,35 +94,69 @@ std::string hashFromBuffer(HashType hash_type,
return hash.digest();
}
std::string hashFromFile(HashType hash_type, const std::string& path) {
MultiHashes hashMultiFromFile(int mask, const std::string& path) {
// Perform a dry-run of a file read without filling in any content.
auto status = readFile(path);
if (!status.ok()) {
return "";
return MultiHashes();
}
std::map<HashType, std::shared_ptr<Hash> > hashes = {
{HASH_TYPE_MD5, std::make_shared<Hash>(HASH_TYPE_MD5)},
{HASH_TYPE_SHA1, std::make_shared<Hash>(HASH_TYPE_SHA1)},
{HASH_TYPE_SHA256, std::make_shared<Hash>(HASH_TYPE_SHA256)},
};
{
// Drop privileges to the user controlling the file.
auto dropper = DropPrivileges::get();
if (!dropper->dropToParent(path)) {
return "";
return MultiHashes();
}
Hash hash(hash_type);
// Use the canonicalized path returned from a successful readFile dry-run.
FILE* file = fopen(status.what().c_str(), "rb");
if (file == nullptr) {
VLOG(1) << "Cannot hash/open file: " << path;
return "";
return MultiHashes();
}
// Then call updates with read chunks.
size_t bytes_read = 0;
unsigned char buffer[HASH_CHUNK_SIZE];
while ((bytes_read = fread(buffer, 1, HASH_CHUNK_SIZE, file))) {
hash.update(buffer, bytes_read);
for (auto& hash : hashes) {
if (mask & hash.first) {
hash.second->update(buffer, bytes_read);
}
}
}
fclose(file);
return hash.digest();
}
MultiHashes mh;
mh.mask = mask;
if (mask & HASH_TYPE_MD5) {
mh.md5 = hashes.at(HASH_TYPE_MD5)->digest();
}
if (mask & HASH_TYPE_SHA1) {
mh.sha1 = hashes.at(HASH_TYPE_SHA1)->digest();
}
if (mask & HASH_TYPE_SHA256) {
mh.sha256 = hashes.at(HASH_TYPE_SHA256)->digest();
}
return mh;
}
std::string hashFromFile(HashType hash_type, const std::string& path) {
auto hashes = hashMultiFromFile(hash_type, path);
if (hash_type == HASH_TYPE_MD5) {
return hashes.md5;
} else if (hash_type == HASH_TYPE_SHA1) {
return hashes.sha1;
} else {
return hashes.sha256;
}
}
}

View File

@ -85,9 +85,11 @@ Status FileEventSubscriber::Callback(const FSEventsEventContextRef& ec,
// Only hash if the file content could have been modified.
if (ec->action == "CREATED" || ec->action == "UPDATED") {
r["md5"] = hashFromFile(HASH_TYPE_MD5, ec->path);
r["sha1"] = hashFromFile(HASH_TYPE_SHA1, ec->path);
r["sha256"] = hashFromFile(HASH_TYPE_SHA256, ec->path);
auto hashes = hashMultiFromFile(
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, ec->path);
r["md5"] = std::move(hashes.md5);
r["sha1"] = std::move(hashes.sha1);
r["sha256"] = std::move(hashes.sha256);
}
if (ec->action != "") {

View File

@ -85,9 +85,11 @@ Status FileEventSubscriber::Callback(const ECRef& ec, const SCRef& sc) {
r["transaction_id"] = INTEGER(ec->event->cookie);
if (ec->action == "CREATED" || ec->action == "UPDATED") {
r["md5"] = hashFromFile(HASH_TYPE_MD5, ec->path);
r["sha1"] = hashFromFile(HASH_TYPE_SHA1, ec->path);
r["sha256"] = hashFromFile(HASH_TYPE_SHA256, ec->path);
auto hashes = hashMultiFromFile(
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, ec->path);
r["md5"] = std::move(hashes.md5);
r["sha1"] = std::move(hashes.sha1);
r["sha256"] = std::move(hashes.sha256);
}
if (ec->action != "" && ec->action != "OPENED") {

View File

@ -256,13 +256,7 @@ void DeviceHelper::generateFiles(const std::string& partition,
}
}
struct DeviceHashes {
std::string md5;
std::string sha1;
std::string sha256;
};
DeviceHashes hashInode(TskFsFile* file) {
MultiHashes hashInode(TskFsFile* file) {
Hash md5(HASH_TYPE_MD5);
Hash sha1(HASH_TYPE_SHA1);
Hash sha256(HASH_TYPE_SHA256);
@ -270,7 +264,7 @@ DeviceHashes hashInode(TskFsFile* file) {
// We are guaranteed by the expected callsite to have a valid meta.
auto* meta = file->getMeta();
if (meta == nullptr) {
return DeviceHashes();
return MultiHashes();
}
// Set a maximum 'chunk' or block size to 1 page or the file size.
@ -290,7 +284,7 @@ DeviceHashes hashInode(TskFsFile* file) {
// Huge problem, either a read failed or didn't read the max size.
free(buffer);
delete meta;
return DeviceHashes();
return MultiHashes();
}
md5.update(buffer, chunk_size);
@ -302,7 +296,7 @@ DeviceHashes hashInode(TskFsFile* file) {
delete meta;
// Convert the set of hashes into a device hashes transport.
DeviceHashes dhs;
MultiHashes dhs;
dhs.md5 = md5.digest();
dhs.sha1 = sha1.digest();
dhs.sha256 = sha256.digest();

View File

@ -27,9 +27,11 @@ void genHashForFile(const std::string& path,
Row r;
r["path"] = path;
r["directory"] = dir;
r["md5"] = osquery::hashFromFile(HASH_TYPE_MD5, path);
r["sha1"] = osquery::hashFromFile(HASH_TYPE_SHA1, path);
r["sha256"] = osquery::hashFromFile(HASH_TYPE_SHA256, path);
auto hashes = hashMultiFromFile(
HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
r["md5"] = std::move(hashes.md5);
r["sha1"] = std::move(hashes.sha1);
r["sha256"] = std::move(hashes.sha256);
results.push_back(r);
}