osquery-1/tools/harnesses/osqueryfuzz_sqlquery_corpus.sh
2019-10-27 10:20:44 -04:00

105 lines
4.0 KiB
Bash
Executable File

#!/usr/bin/env bash
# Copyright (c) 2014-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed in accordance with the terms specified in
# the LICENSE file found in the root directory of this source tree.
set +e
# This script searches through most of osquery's source tree for sql-like statements to build
# a seed corpus.
# Something important to note about this script: it does NOT have to be perfect.
# If we output incomplete or malformed SQL statements because we extracted them
# incorrectly: that's okay. These results are used as a seed corpus, so they literally
# can be as malformed as possible and it doesn't matter.
# So we make a good effort to extract complete statements, but if it misses some, that's okay.
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
function usage() {
echo "${BASH_SOURCE[0]} destination-file"
}
function main() {
if [[ $# < 1 ]]; then
usage
exit 1
fi
# We put this above the current directory to avoid accidently grabbing the file
RESULTDEST=../tmp
RESULTFILE=results.txt
mkdir $RESULTDEST
echo "" > $RESULTDEST/$RESULTFILE
# Find all the files containing "select and pass them to this beast of an awk script
# Only look in a few directories to avoid stuff like libraries, build, etc
grep -R "\"select" $SCRIPT_DIR/../../tests $SCRIPT_DIR/../../packs $SCRIPT_DIR/../../osquery $SCRIPT_DIR/../../plugins -l | xargs -I [] -- awk '
function ltrim(s) { sub(/^[ \t\r\n]+/, "", s); return s }
function rtrim(s) { sub(/[ \t\r\n]+$/, "", s); return s }
function trim(s) { return rtrim(ltrim(s)); }
function remove_leading_json_key(s) { s=ltrim(s); sub(/^\"query\"[ \t]*:[ \t]*\"/, "", s); return s }
function remove_slash_continuance(s) { sub(/^\"/, "", s); sub(/\\$/, "", s); return s }
function remove_quote_continuance(s) { sub(/^\{?\"/, "", s); sub(/\"$/, "", s); return s }
function remove_statement_end(s) { sub(/[\"\)\];,]+$/, "", s); return s }
BEGIN {
}
# Reminder: strings are 1-indexed and match returns 0 on failure
# This block will be for matching what we hope will be a complete SQL string
# If it turns out it is not, we will test for two types of continuances below
/\"select(.+)/ {
# Look for a complete SQL statement, semicolon terminated, on one line ending in a quote
i=match($0, /\"select(.+);\"$/);
if(i) {
print substr(remove_statement_end($0), i+1);
next; # Were done, go to next line
}
# Look for hopefully a complete SQL statement on one line ending in a quote and then comma
i=match($0, /\"select(.+)\",$/);
if(i) {
print substr(remove_statement_end($0), i+1);
next; # Were done, go to next line
}
}
# Okay, we didnt find a sql statement with a semicolon or one ending in a ", so look for continuances
# This is a SQL statement that starts on one line and finishes with a line continuance: \
/\"select(.+)\\$/ {
do {
printf "%s", remove_slash_continuance(trim(remove_leading_json_key($0)));
if(getline <= 0) # Tests for end of file
break;
} while( $0 ~ /(.+)\\$/ ); # As long as we keep finding lines ending in a \ keep printing it out
print remove_statement_end($0);
next;
}
# This is a SQL statement that starts on one line and finished with a C-style line continuance (a double-quote, no comma or +)
/^[ \t]*\{?\"select(.+)\"$/ {
do {
printf "%s", remove_quote_continuance(trim(remove_leading_json_key($0)));
if(getline <= 0) # Tests for end of file
break;
} while( $0 ~ /(.+)\"$/ ); # As long as we keep finding lines ending in a " keep printing it out
print remove_statement_end(remove_quote_continuance(trim($0)));
next;
}
END {
}' [] >> $RESULTDEST/$RESULTFILE
pushd $RESULTDEST
# Find any \" and replace them with just "
sed -i 's/\\\"/\"/g' $RESULTFILE
# Okay we now have a file containing one SQL statement per line.
# The corpus needs to be a zip file containing one statement per file.
split -l 1 $RESULTFILE
rm $RESULTFILE
popd
zip -j $1 $RESULTDEST/*
rm -r $RESULTDEST
}
main $@