mirror of
https://github.com/empayre/fleet.git
synced 2024-11-06 08:55:24 +00:00
Scripts char limit: make DB changes to support a central table of unique script contents (#17256)
#16840 # Checklist for submitter - [x] Changes file added for user-visible changes in `changes/` or `orbit/changes/`. See [Changes files](https://fleetdm.com/docs/contributing/committing-changes#changes-files) for more information. - [x] Added/updated tests - [x] If database migrations are included, checked table schema to confirm autoupdate - For database migrations: - [x] Checked schema for all modified table for columns that will auto-update timestamps during migration. - [x] Confirmed that updating the timestamps is acceptable, and will not cause unwanted side effects.
This commit is contained in:
parent
48f1ea994b
commit
b9a9debfa4
1
changes/16840-db-changes-for-script-limit-increase
Normal file
1
changes/16840-db-changes-for-script-limit-increase
Normal file
@ -0,0 +1 @@
|
||||
* Updated the database schema to support the increase in size of scripts.
|
@ -0,0 +1,190 @@
|
||||
package tables
|
||||
|
||||
import (
|
||||
"crypto/md5" //nolint:gosec
|
||||
"database/sql"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/jmoiron/sqlx/reflectx"
|
||||
)
|
||||
|
||||
func init() {
|
||||
MigrationClient.AddMigration(Up_20240228111134, Down_20240228111134)
|
||||
}
|
||||
|
||||
func Up_20240228111134(tx *sql.Tx) error {
|
||||
txx := sqlx.Tx{Tx: tx, Mapper: reflectx.NewMapperFunc("db", sqlx.NameMapper)}
|
||||
|
||||
// at the time of this migration, we have two tables that deal with scripts:
|
||||
// - host_script_results: stores both the contents of the script and
|
||||
// eventually its result. Both anonymous and saved scripts get their
|
||||
// contents stored here (for saved scripts, this is so that if the saved
|
||||
// script is deleted we can still display the contents in the activities).
|
||||
// - scripts: stores saved scripts with a name and content, which can then
|
||||
// be executed on hosts (which would copy the script content to
|
||||
// host_script_results as well as adding a FK to the saved script).
|
||||
//
|
||||
// This migration moves the script contents to a separate table, deduplicated
|
||||
// so that the same contents (whether it is via a saved script or an
|
||||
// anonymous one) are only stored once and both host_script_results and
|
||||
// scripts reference that entry.
|
||||
|
||||
// Using md5 checksum stored in binary (so,
|
||||
// "UNHEX(md5-string-representation)" when storing) for efficient storage.
|
||||
// The choice of md5 despite it being broken is because:
|
||||
// - we don't use it for anything critical, just deduplication of scripts
|
||||
// - it's available in mysql; sha2 is also a possibility, but there's this
|
||||
// note in mysql's documentation
|
||||
// (https://dev.mysql.com/doc/refman/5.7/en/encryption-functions.html#function_sha2):
|
||||
// > This function works only if MySQL has been configured with SSL support.
|
||||
// and we need to support a wide variety of MySQL installations in
|
||||
// the wild. (sha1 is also available without this constraint but also broken)
|
||||
// - it's same as what we use elsewhere in the DB, e.g. mdm apple profiles
|
||||
createScriptContentsStmt := `
|
||||
CREATE TABLE script_contents (
|
||||
id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||
md5_checksum BINARY(16) NOT NULL,
|
||||
contents TEXT COLLATE utf8mb4_unicode_ci NOT NULL,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
UNIQUE KEY idx_script_contents_md5_checksum (md5_checksum)
|
||||
)`
|
||||
if _, err := txx.Exec(createScriptContentsStmt); err != nil {
|
||||
return fmt.Errorf("create table script_contents: %w", err)
|
||||
}
|
||||
|
||||
insertScriptContentsStmt := `
|
||||
INSERT INTO
|
||||
script_contents (md5_checksum, contents)
|
||||
VALUES
|
||||
(UNHEX(?), ?)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
created_at = created_at
|
||||
`
|
||||
|
||||
type scriptContent struct {
|
||||
ID uint `db:"id"`
|
||||
ScriptContents string `db:"script_contents"`
|
||||
}
|
||||
|
||||
// load all contents found in host_script_results to next insert in
|
||||
// script_contents
|
||||
readHostScriptsStmt := `
|
||||
SELECT
|
||||
id,
|
||||
script_contents
|
||||
FROM
|
||||
host_script_results
|
||||
`
|
||||
var hostScripts []scriptContent
|
||||
if err := txx.Select(&hostScripts, readHostScriptsStmt); err != nil {
|
||||
return fmt.Errorf("load host_script_results contents: %w", err)
|
||||
}
|
||||
|
||||
// load all contents found in scripts to next insert in script_contents
|
||||
readScriptsStmt := `
|
||||
SELECT
|
||||
id,
|
||||
script_contents
|
||||
FROM
|
||||
scripts
|
||||
`
|
||||
var savedScripts []scriptContent
|
||||
if err := txx.Select(&savedScripts, readScriptsStmt); err != nil {
|
||||
return fmt.Errorf("load saved scripts contents: %w", err)
|
||||
}
|
||||
|
||||
// for every script content, md5-hash it and keep track of the hash
|
||||
// associated with the ids to update the host_script_results and scripts
|
||||
// tables with the new script_content_id later on.
|
||||
hostScriptsLookup := make(map[string][]uint, len(hostScripts))
|
||||
savedScriptsLookup := make(map[string][]uint, len(savedScripts))
|
||||
for _, s := range hostScripts {
|
||||
hexChecksum := md5ChecksumScriptContent(s.ScriptContents)
|
||||
hostScriptsLookup[hexChecksum] = append(hostScriptsLookup[hexChecksum], s.ID)
|
||||
if _, err := txx.Exec(insertScriptContentsStmt, hexChecksum, s.ScriptContents); err != nil {
|
||||
return fmt.Errorf("create script_contents from host_script_results: %w", err)
|
||||
}
|
||||
}
|
||||
for _, s := range savedScripts {
|
||||
hexChecksum := md5ChecksumScriptContent(s.ScriptContents)
|
||||
savedScriptsLookup[hexChecksum] = append(savedScriptsLookup[hexChecksum], s.ID)
|
||||
if _, err := txx.Exec(insertScriptContentsStmt, hexChecksum, s.ScriptContents); err != nil {
|
||||
return fmt.Errorf("create script_contents from saved scripts: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
alterAddHostScriptsStmt := `
|
||||
ALTER TABLE host_script_results
|
||||
ADD COLUMN script_content_id INT(10) UNSIGNED NULL,
|
||||
ADD FOREIGN KEY (script_content_id) REFERENCES script_contents (id) ON DELETE CASCADE`
|
||||
if _, err := tx.Exec(alterAddHostScriptsStmt); err != nil {
|
||||
return fmt.Errorf("alter add column to table host_script_results: %w", err)
|
||||
}
|
||||
|
||||
alterAddScriptsStmt := `
|
||||
ALTER TABLE scripts
|
||||
ADD COLUMN script_content_id INT(10) UNSIGNED NULL,
|
||||
ADD FOREIGN KEY (script_content_id) REFERENCES script_contents (id) ON DELETE CASCADE`
|
||||
if _, err := tx.Exec(alterAddScriptsStmt); err != nil {
|
||||
return fmt.Errorf("alter add column to table scripts: %w", err)
|
||||
}
|
||||
|
||||
updateHostScriptsStmt := `
|
||||
UPDATE
|
||||
host_script_results
|
||||
SET
|
||||
script_content_id = (SELECT id FROM script_contents WHERE md5_checksum = UNHEX(?)),
|
||||
updated_at = updated_at
|
||||
WHERE
|
||||
id = ?`
|
||||
|
||||
// for saved scripts, the `updated_at` timestamp is used as the "uploaded_at"
|
||||
// information in the UI, so we ensure that it doesn't change with this
|
||||
// migration.
|
||||
updateSavedScriptsStmt := `
|
||||
UPDATE
|
||||
scripts
|
||||
SET
|
||||
script_content_id = (SELECT id FROM script_contents WHERE md5_checksum = UNHEX(?)),
|
||||
updated_at = updated_at
|
||||
WHERE
|
||||
id = ?`
|
||||
|
||||
// insert the associated script_content_id into host_script_results and
|
||||
// scripts
|
||||
for hexChecksum, ids := range hostScriptsLookup {
|
||||
for _, id := range ids {
|
||||
if _, err := txx.Exec(updateHostScriptsStmt, hexChecksum, id); err != nil {
|
||||
return fmt.Errorf("update host_script_results with script_content_id: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for hexChecksum, ids := range savedScriptsLookup {
|
||||
for _, id := range ids {
|
||||
if _, err := txx.Exec(updateSavedScriptsStmt, hexChecksum, id); err != nil {
|
||||
return fmt.Errorf("update saved scripts with script_content_id: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(mna): we cannot drop the "script_contents" column immediately from
|
||||
// the host_script_results and scripts tables because that would break the
|
||||
// current code, we need to wait for the feature to be fully implemented.
|
||||
// There's no harm in leaving it in there unused for now, as stored scripts
|
||||
// were previously smallish.
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func md5ChecksumScriptContent(s string) string {
|
||||
rawChecksum := md5.Sum([]byte(s)) //nolint:gosec
|
||||
return strings.ToUpper(hex.EncodeToString(rawChecksum[:]))
|
||||
}
|
||||
|
||||
func Down_20240228111134(tx *sql.Tx) error {
|
||||
return nil
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
package tables
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestUp_20240228111134(t *testing.T) {
|
||||
db := applyUpToPrev(t)
|
||||
|
||||
scriptA, scriptB, scriptC, scriptD := "scriptA", "scriptB", "scriptC", "scriptD"
|
||||
md5A, md5B, md5C, md5D := md5ChecksumScriptContent(scriptA),
|
||||
md5ChecksumScriptContent(scriptB),
|
||||
md5ChecksumScriptContent(scriptC),
|
||||
md5ChecksumScriptContent(scriptD)
|
||||
|
||||
// create saved scripts for A and B
|
||||
savedScriptAID := execNoErrLastID(t, db, `INSERT INTO scripts (name, script_contents) VALUES (?, ?)`, "A", scriptA)
|
||||
savedScriptBID := execNoErrLastID(t, db, `INSERT INTO scripts (name, script_contents) VALUES (?, ?)`, "B", scriptB)
|
||||
|
||||
// create some host executions for A and C (C being anonymous), none for B
|
||||
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_id, script_contents) VALUES (1, uuid(), 'ok', ?, ?)`, savedScriptAID, scriptA)
|
||||
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_id, script_contents) VALUES (1, uuid(), 'ok', ?, ?)`, savedScriptAID, scriptA)
|
||||
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_id, script_contents) VALUES (2, uuid(), 'ok', ?, ?)`, savedScriptAID, scriptA)
|
||||
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_contents) VALUES (3, uuid(), 'ok', ?)`, scriptC)
|
||||
// also create one for scriptD associated with savedScriptAID (possible if
|
||||
// that saved script was edited after execution)
|
||||
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_id, script_contents) VALUES (4, uuid(), 'ok', ?, ?)`, savedScriptAID, scriptD)
|
||||
|
||||
applyNext(t, db)
|
||||
|
||||
// there should be 4 scripts in script_contents
|
||||
type scriptContent struct {
|
||||
ID uint `db:"id"`
|
||||
MD5Checksum string `db:"md5_checksum"`
|
||||
Contents string `db:"contents"`
|
||||
}
|
||||
var scriptContents []*scriptContent
|
||||
err := db.Select(&scriptContents, `SELECT id, HEX(md5_checksum) as md5_checksum, contents FROM script_contents`)
|
||||
require.NoError(t, err)
|
||||
|
||||
// build a lookup map of contents hash to script_content_id
|
||||
contentHashToID := make(map[string]uint, len(scriptContents))
|
||||
for _, sc := range scriptContents {
|
||||
contentHashToID[sc.MD5Checksum] = sc.ID
|
||||
sc.ID = 0
|
||||
}
|
||||
|
||||
// check that the received script contents have the expected hash
|
||||
expect := []*scriptContent{
|
||||
{0, md5A, scriptA},
|
||||
{0, md5B, scriptB},
|
||||
{0, md5C, scriptC},
|
||||
{0, md5D, scriptD},
|
||||
}
|
||||
require.ElementsMatch(t, expect, scriptContents)
|
||||
|
||||
// verify that the script_content_id of the other tables has been properly set
|
||||
var scriptContentID uint
|
||||
err = db.Get(&scriptContentID, `SELECT script_content_id FROM scripts WHERE id = ?`, savedScriptAID)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, contentHashToID[md5A], scriptContentID)
|
||||
|
||||
err = db.Get(&scriptContentID, `SELECT script_content_id FROM scripts WHERE id = ?`, savedScriptBID)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, contentHashToID[md5B], scriptContentID)
|
||||
|
||||
// hosts 1 and 2 have scriptA, host 3 has scriptC, host 4 has scriptD
|
||||
var hostResultIDs []struct {
|
||||
ScriptContentID *uint `db:"script_content_id"`
|
||||
}
|
||||
err = db.Select(&hostResultIDs, `SELECT script_content_id FROM host_script_results WHERE host_id IN (1, 2)`)
|
||||
require.NoError(t, err)
|
||||
// 3 rows, all the id of scriptA
|
||||
require.Len(t, hostResultIDs, 3)
|
||||
require.NotNil(t, hostResultIDs[0].ScriptContentID)
|
||||
require.NotNil(t, hostResultIDs[1].ScriptContentID)
|
||||
require.NotNil(t, hostResultIDs[2].ScriptContentID)
|
||||
require.Equal(t, contentHashToID[md5A], *hostResultIDs[0].ScriptContentID)
|
||||
require.Equal(t, contentHashToID[md5A], *hostResultIDs[1].ScriptContentID)
|
||||
require.Equal(t, contentHashToID[md5A], *hostResultIDs[2].ScriptContentID)
|
||||
|
||||
hostResultIDs = nil
|
||||
err = db.Select(&hostResultIDs, `SELECT script_content_id FROM host_script_results WHERE host_id = 3`)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, hostResultIDs, 1)
|
||||
require.NotNil(t, hostResultIDs[0].ScriptContentID)
|
||||
require.Equal(t, contentHashToID[md5C], *hostResultIDs[0].ScriptContentID)
|
||||
|
||||
hostResultIDs = nil
|
||||
err = db.Select(&hostResultIDs, `SELECT script_content_id FROM host_script_results WHERE host_id = 4`)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, hostResultIDs, 1)
|
||||
require.NotNil(t, hostResultIDs[0].ScriptContentID)
|
||||
require.Equal(t, contentHashToID[md5D], *hostResultIDs[0].ScriptContentID)
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -426,8 +426,6 @@ type NewGlobalPolicyFunc func(ctx context.Context, authorID *uint, args fleet.Po
|
||||
|
||||
type PolicyFunc func(ctx context.Context, id uint) (*fleet.Policy, error)
|
||||
|
||||
type PolicyByNameFunc func(ctx context.Context, name string) (*fleet.Policy, error)
|
||||
|
||||
type SavePolicyFunc func(ctx context.Context, p *fleet.Policy, shouldRemoveAllPolicyMemberships bool) error
|
||||
|
||||
type ListGlobalPoliciesFunc func(ctx context.Context, opts fleet.ListOptions) ([]*fleet.Policy, error)
|
||||
@ -1453,9 +1451,6 @@ type DataStore struct {
|
||||
PolicyFunc PolicyFunc
|
||||
PolicyFuncInvoked bool
|
||||
|
||||
PolicyByNameFunc PolicyByNameFunc
|
||||
PolicyByNameFuncInvoked bool
|
||||
|
||||
SavePolicyFunc SavePolicyFunc
|
||||
SavePolicyFuncInvoked bool
|
||||
|
||||
@ -3505,13 +3500,6 @@ func (s *DataStore) Policy(ctx context.Context, id uint) (*fleet.Policy, error)
|
||||
return s.PolicyFunc(ctx, id)
|
||||
}
|
||||
|
||||
func (s *DataStore) PolicyByName(ctx context.Context, name string) (*fleet.Policy, error) {
|
||||
s.mu.Lock()
|
||||
s.PolicyByNameFuncInvoked = true
|
||||
s.mu.Unlock()
|
||||
return s.PolicyByNameFunc(ctx, name)
|
||||
}
|
||||
|
||||
func (s *DataStore) SavePolicy(ctx context.Context, p *fleet.Policy, shouldRemoveAllPolicyMemberships bool) error {
|
||||
s.mu.Lock()
|
||||
s.SavePolicyFuncInvoked = true
|
||||
|
Loading…
Reference in New Issue
Block a user