mirror of
https://github.com/empayre/fleet.git
synced 2024-11-06 08:55:24 +00:00
Scripts char limit: make DB changes to support a central table of unique script contents (#17256)
#16840 # Checklist for submitter - [x] Changes file added for user-visible changes in `changes/` or `orbit/changes/`. See [Changes files](https://fleetdm.com/docs/contributing/committing-changes#changes-files) for more information. - [x] Added/updated tests - [x] If database migrations are included, checked table schema to confirm autoupdate - For database migrations: - [x] Checked schema for all modified table for columns that will auto-update timestamps during migration. - [x] Confirmed that updating the timestamps is acceptable, and will not cause unwanted side effects.
This commit is contained in:
parent
48f1ea994b
commit
b9a9debfa4
1
changes/16840-db-changes-for-script-limit-increase
Normal file
1
changes/16840-db-changes-for-script-limit-increase
Normal file
@ -0,0 +1 @@
|
|||||||
|
* Updated the database schema to support the increase in size of scripts.
|
@ -0,0 +1,190 @@
|
|||||||
|
package tables
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/md5" //nolint:gosec
|
||||||
|
"database/sql"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
"github.com/jmoiron/sqlx/reflectx"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
MigrationClient.AddMigration(Up_20240228111134, Down_20240228111134)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Up_20240228111134(tx *sql.Tx) error {
|
||||||
|
txx := sqlx.Tx{Tx: tx, Mapper: reflectx.NewMapperFunc("db", sqlx.NameMapper)}
|
||||||
|
|
||||||
|
// at the time of this migration, we have two tables that deal with scripts:
|
||||||
|
// - host_script_results: stores both the contents of the script and
|
||||||
|
// eventually its result. Both anonymous and saved scripts get their
|
||||||
|
// contents stored here (for saved scripts, this is so that if the saved
|
||||||
|
// script is deleted we can still display the contents in the activities).
|
||||||
|
// - scripts: stores saved scripts with a name and content, which can then
|
||||||
|
// be executed on hosts (which would copy the script content to
|
||||||
|
// host_script_results as well as adding a FK to the saved script).
|
||||||
|
//
|
||||||
|
// This migration moves the script contents to a separate table, deduplicated
|
||||||
|
// so that the same contents (whether it is via a saved script or an
|
||||||
|
// anonymous one) are only stored once and both host_script_results and
|
||||||
|
// scripts reference that entry.
|
||||||
|
|
||||||
|
// Using md5 checksum stored in binary (so,
|
||||||
|
// "UNHEX(md5-string-representation)" when storing) for efficient storage.
|
||||||
|
// The choice of md5 despite it being broken is because:
|
||||||
|
// - we don't use it for anything critical, just deduplication of scripts
|
||||||
|
// - it's available in mysql; sha2 is also a possibility, but there's this
|
||||||
|
// note in mysql's documentation
|
||||||
|
// (https://dev.mysql.com/doc/refman/5.7/en/encryption-functions.html#function_sha2):
|
||||||
|
// > This function works only if MySQL has been configured with SSL support.
|
||||||
|
// and we need to support a wide variety of MySQL installations in
|
||||||
|
// the wild. (sha1 is also available without this constraint but also broken)
|
||||||
|
// - it's same as what we use elsewhere in the DB, e.g. mdm apple profiles
|
||||||
|
createScriptContentsStmt := `
|
||||||
|
CREATE TABLE script_contents (
|
||||||
|
id INT(10) UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
md5_checksum BINARY(16) NOT NULL,
|
||||||
|
contents TEXT COLLATE utf8mb4_unicode_ci NOT NULL,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
UNIQUE KEY idx_script_contents_md5_checksum (md5_checksum)
|
||||||
|
)`
|
||||||
|
if _, err := txx.Exec(createScriptContentsStmt); err != nil {
|
||||||
|
return fmt.Errorf("create table script_contents: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
insertScriptContentsStmt := `
|
||||||
|
INSERT INTO
|
||||||
|
script_contents (md5_checksum, contents)
|
||||||
|
VALUES
|
||||||
|
(UNHEX(?), ?)
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
created_at = created_at
|
||||||
|
`
|
||||||
|
|
||||||
|
type scriptContent struct {
|
||||||
|
ID uint `db:"id"`
|
||||||
|
ScriptContents string `db:"script_contents"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// load all contents found in host_script_results to next insert in
|
||||||
|
// script_contents
|
||||||
|
readHostScriptsStmt := `
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
script_contents
|
||||||
|
FROM
|
||||||
|
host_script_results
|
||||||
|
`
|
||||||
|
var hostScripts []scriptContent
|
||||||
|
if err := txx.Select(&hostScripts, readHostScriptsStmt); err != nil {
|
||||||
|
return fmt.Errorf("load host_script_results contents: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// load all contents found in scripts to next insert in script_contents
|
||||||
|
readScriptsStmt := `
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
script_contents
|
||||||
|
FROM
|
||||||
|
scripts
|
||||||
|
`
|
||||||
|
var savedScripts []scriptContent
|
||||||
|
if err := txx.Select(&savedScripts, readScriptsStmt); err != nil {
|
||||||
|
return fmt.Errorf("load saved scripts contents: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// for every script content, md5-hash it and keep track of the hash
|
||||||
|
// associated with the ids to update the host_script_results and scripts
|
||||||
|
// tables with the new script_content_id later on.
|
||||||
|
hostScriptsLookup := make(map[string][]uint, len(hostScripts))
|
||||||
|
savedScriptsLookup := make(map[string][]uint, len(savedScripts))
|
||||||
|
for _, s := range hostScripts {
|
||||||
|
hexChecksum := md5ChecksumScriptContent(s.ScriptContents)
|
||||||
|
hostScriptsLookup[hexChecksum] = append(hostScriptsLookup[hexChecksum], s.ID)
|
||||||
|
if _, err := txx.Exec(insertScriptContentsStmt, hexChecksum, s.ScriptContents); err != nil {
|
||||||
|
return fmt.Errorf("create script_contents from host_script_results: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, s := range savedScripts {
|
||||||
|
hexChecksum := md5ChecksumScriptContent(s.ScriptContents)
|
||||||
|
savedScriptsLookup[hexChecksum] = append(savedScriptsLookup[hexChecksum], s.ID)
|
||||||
|
if _, err := txx.Exec(insertScriptContentsStmt, hexChecksum, s.ScriptContents); err != nil {
|
||||||
|
return fmt.Errorf("create script_contents from saved scripts: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
alterAddHostScriptsStmt := `
|
||||||
|
ALTER TABLE host_script_results
|
||||||
|
ADD COLUMN script_content_id INT(10) UNSIGNED NULL,
|
||||||
|
ADD FOREIGN KEY (script_content_id) REFERENCES script_contents (id) ON DELETE CASCADE`
|
||||||
|
if _, err := tx.Exec(alterAddHostScriptsStmt); err != nil {
|
||||||
|
return fmt.Errorf("alter add column to table host_script_results: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
alterAddScriptsStmt := `
|
||||||
|
ALTER TABLE scripts
|
||||||
|
ADD COLUMN script_content_id INT(10) UNSIGNED NULL,
|
||||||
|
ADD FOREIGN KEY (script_content_id) REFERENCES script_contents (id) ON DELETE CASCADE`
|
||||||
|
if _, err := tx.Exec(alterAddScriptsStmt); err != nil {
|
||||||
|
return fmt.Errorf("alter add column to table scripts: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
updateHostScriptsStmt := `
|
||||||
|
UPDATE
|
||||||
|
host_script_results
|
||||||
|
SET
|
||||||
|
script_content_id = (SELECT id FROM script_contents WHERE md5_checksum = UNHEX(?)),
|
||||||
|
updated_at = updated_at
|
||||||
|
WHERE
|
||||||
|
id = ?`
|
||||||
|
|
||||||
|
// for saved scripts, the `updated_at` timestamp is used as the "uploaded_at"
|
||||||
|
// information in the UI, so we ensure that it doesn't change with this
|
||||||
|
// migration.
|
||||||
|
updateSavedScriptsStmt := `
|
||||||
|
UPDATE
|
||||||
|
scripts
|
||||||
|
SET
|
||||||
|
script_content_id = (SELECT id FROM script_contents WHERE md5_checksum = UNHEX(?)),
|
||||||
|
updated_at = updated_at
|
||||||
|
WHERE
|
||||||
|
id = ?`
|
||||||
|
|
||||||
|
// insert the associated script_content_id into host_script_results and
|
||||||
|
// scripts
|
||||||
|
for hexChecksum, ids := range hostScriptsLookup {
|
||||||
|
for _, id := range ids {
|
||||||
|
if _, err := txx.Exec(updateHostScriptsStmt, hexChecksum, id); err != nil {
|
||||||
|
return fmt.Errorf("update host_script_results with script_content_id: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for hexChecksum, ids := range savedScriptsLookup {
|
||||||
|
for _, id := range ids {
|
||||||
|
if _, err := txx.Exec(updateSavedScriptsStmt, hexChecksum, id); err != nil {
|
||||||
|
return fmt.Errorf("update saved scripts with script_content_id: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(mna): we cannot drop the "script_contents" column immediately from
|
||||||
|
// the host_script_results and scripts tables because that would break the
|
||||||
|
// current code, we need to wait for the feature to be fully implemented.
|
||||||
|
// There's no harm in leaving it in there unused for now, as stored scripts
|
||||||
|
// were previously smallish.
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func md5ChecksumScriptContent(s string) string {
|
||||||
|
rawChecksum := md5.Sum([]byte(s)) //nolint:gosec
|
||||||
|
return strings.ToUpper(hex.EncodeToString(rawChecksum[:]))
|
||||||
|
}
|
||||||
|
|
||||||
|
func Down_20240228111134(tx *sql.Tx) error {
|
||||||
|
return nil
|
||||||
|
}
|
@ -0,0 +1,97 @@
|
|||||||
|
package tables
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestUp_20240228111134(t *testing.T) {
|
||||||
|
db := applyUpToPrev(t)
|
||||||
|
|
||||||
|
scriptA, scriptB, scriptC, scriptD := "scriptA", "scriptB", "scriptC", "scriptD"
|
||||||
|
md5A, md5B, md5C, md5D := md5ChecksumScriptContent(scriptA),
|
||||||
|
md5ChecksumScriptContent(scriptB),
|
||||||
|
md5ChecksumScriptContent(scriptC),
|
||||||
|
md5ChecksumScriptContent(scriptD)
|
||||||
|
|
||||||
|
// create saved scripts for A and B
|
||||||
|
savedScriptAID := execNoErrLastID(t, db, `INSERT INTO scripts (name, script_contents) VALUES (?, ?)`, "A", scriptA)
|
||||||
|
savedScriptBID := execNoErrLastID(t, db, `INSERT INTO scripts (name, script_contents) VALUES (?, ?)`, "B", scriptB)
|
||||||
|
|
||||||
|
// create some host executions for A and C (C being anonymous), none for B
|
||||||
|
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_id, script_contents) VALUES (1, uuid(), 'ok', ?, ?)`, savedScriptAID, scriptA)
|
||||||
|
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_id, script_contents) VALUES (1, uuid(), 'ok', ?, ?)`, savedScriptAID, scriptA)
|
||||||
|
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_id, script_contents) VALUES (2, uuid(), 'ok', ?, ?)`, savedScriptAID, scriptA)
|
||||||
|
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_contents) VALUES (3, uuid(), 'ok', ?)`, scriptC)
|
||||||
|
// also create one for scriptD associated with savedScriptAID (possible if
|
||||||
|
// that saved script was edited after execution)
|
||||||
|
execNoErr(t, db, `INSERT INTO host_script_results (host_id, execution_id, output, script_id, script_contents) VALUES (4, uuid(), 'ok', ?, ?)`, savedScriptAID, scriptD)
|
||||||
|
|
||||||
|
applyNext(t, db)
|
||||||
|
|
||||||
|
// there should be 4 scripts in script_contents
|
||||||
|
type scriptContent struct {
|
||||||
|
ID uint `db:"id"`
|
||||||
|
MD5Checksum string `db:"md5_checksum"`
|
||||||
|
Contents string `db:"contents"`
|
||||||
|
}
|
||||||
|
var scriptContents []*scriptContent
|
||||||
|
err := db.Select(&scriptContents, `SELECT id, HEX(md5_checksum) as md5_checksum, contents FROM script_contents`)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// build a lookup map of contents hash to script_content_id
|
||||||
|
contentHashToID := make(map[string]uint, len(scriptContents))
|
||||||
|
for _, sc := range scriptContents {
|
||||||
|
contentHashToID[sc.MD5Checksum] = sc.ID
|
||||||
|
sc.ID = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// check that the received script contents have the expected hash
|
||||||
|
expect := []*scriptContent{
|
||||||
|
{0, md5A, scriptA},
|
||||||
|
{0, md5B, scriptB},
|
||||||
|
{0, md5C, scriptC},
|
||||||
|
{0, md5D, scriptD},
|
||||||
|
}
|
||||||
|
require.ElementsMatch(t, expect, scriptContents)
|
||||||
|
|
||||||
|
// verify that the script_content_id of the other tables has been properly set
|
||||||
|
var scriptContentID uint
|
||||||
|
err = db.Get(&scriptContentID, `SELECT script_content_id FROM scripts WHERE id = ?`, savedScriptAID)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, contentHashToID[md5A], scriptContentID)
|
||||||
|
|
||||||
|
err = db.Get(&scriptContentID, `SELECT script_content_id FROM scripts WHERE id = ?`, savedScriptBID)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, contentHashToID[md5B], scriptContentID)
|
||||||
|
|
||||||
|
// hosts 1 and 2 have scriptA, host 3 has scriptC, host 4 has scriptD
|
||||||
|
var hostResultIDs []struct {
|
||||||
|
ScriptContentID *uint `db:"script_content_id"`
|
||||||
|
}
|
||||||
|
err = db.Select(&hostResultIDs, `SELECT script_content_id FROM host_script_results WHERE host_id IN (1, 2)`)
|
||||||
|
require.NoError(t, err)
|
||||||
|
// 3 rows, all the id of scriptA
|
||||||
|
require.Len(t, hostResultIDs, 3)
|
||||||
|
require.NotNil(t, hostResultIDs[0].ScriptContentID)
|
||||||
|
require.NotNil(t, hostResultIDs[1].ScriptContentID)
|
||||||
|
require.NotNil(t, hostResultIDs[2].ScriptContentID)
|
||||||
|
require.Equal(t, contentHashToID[md5A], *hostResultIDs[0].ScriptContentID)
|
||||||
|
require.Equal(t, contentHashToID[md5A], *hostResultIDs[1].ScriptContentID)
|
||||||
|
require.Equal(t, contentHashToID[md5A], *hostResultIDs[2].ScriptContentID)
|
||||||
|
|
||||||
|
hostResultIDs = nil
|
||||||
|
err = db.Select(&hostResultIDs, `SELECT script_content_id FROM host_script_results WHERE host_id = 3`)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, hostResultIDs, 1)
|
||||||
|
require.NotNil(t, hostResultIDs[0].ScriptContentID)
|
||||||
|
require.Equal(t, contentHashToID[md5C], *hostResultIDs[0].ScriptContentID)
|
||||||
|
|
||||||
|
hostResultIDs = nil
|
||||||
|
err = db.Select(&hostResultIDs, `SELECT script_content_id FROM host_script_results WHERE host_id = 4`)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, hostResultIDs, 1)
|
||||||
|
require.NotNil(t, hostResultIDs[0].ScriptContentID)
|
||||||
|
require.Equal(t, contentHashToID[md5D], *hostResultIDs[0].ScriptContentID)
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
@ -426,8 +426,6 @@ type NewGlobalPolicyFunc func(ctx context.Context, authorID *uint, args fleet.Po
|
|||||||
|
|
||||||
type PolicyFunc func(ctx context.Context, id uint) (*fleet.Policy, error)
|
type PolicyFunc func(ctx context.Context, id uint) (*fleet.Policy, error)
|
||||||
|
|
||||||
type PolicyByNameFunc func(ctx context.Context, name string) (*fleet.Policy, error)
|
|
||||||
|
|
||||||
type SavePolicyFunc func(ctx context.Context, p *fleet.Policy, shouldRemoveAllPolicyMemberships bool) error
|
type SavePolicyFunc func(ctx context.Context, p *fleet.Policy, shouldRemoveAllPolicyMemberships bool) error
|
||||||
|
|
||||||
type ListGlobalPoliciesFunc func(ctx context.Context, opts fleet.ListOptions) ([]*fleet.Policy, error)
|
type ListGlobalPoliciesFunc func(ctx context.Context, opts fleet.ListOptions) ([]*fleet.Policy, error)
|
||||||
@ -1453,9 +1451,6 @@ type DataStore struct {
|
|||||||
PolicyFunc PolicyFunc
|
PolicyFunc PolicyFunc
|
||||||
PolicyFuncInvoked bool
|
PolicyFuncInvoked bool
|
||||||
|
|
||||||
PolicyByNameFunc PolicyByNameFunc
|
|
||||||
PolicyByNameFuncInvoked bool
|
|
||||||
|
|
||||||
SavePolicyFunc SavePolicyFunc
|
SavePolicyFunc SavePolicyFunc
|
||||||
SavePolicyFuncInvoked bool
|
SavePolicyFuncInvoked bool
|
||||||
|
|
||||||
@ -3505,13 +3500,6 @@ func (s *DataStore) Policy(ctx context.Context, id uint) (*fleet.Policy, error)
|
|||||||
return s.PolicyFunc(ctx, id)
|
return s.PolicyFunc(ctx, id)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *DataStore) PolicyByName(ctx context.Context, name string) (*fleet.Policy, error) {
|
|
||||||
s.mu.Lock()
|
|
||||||
s.PolicyByNameFuncInvoked = true
|
|
||||||
s.mu.Unlock()
|
|
||||||
return s.PolicyByNameFunc(ctx, name)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *DataStore) SavePolicy(ctx context.Context, p *fleet.Policy, shouldRemoveAllPolicyMemberships bool) error {
|
func (s *DataStore) SavePolicy(ctx context.Context, p *fleet.Policy, shouldRemoveAllPolicyMemberships bool) error {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
s.SavePolicyFuncInvoked = true
|
s.SavePolicyFuncInvoked = true
|
||||||
|
Loading…
Reference in New Issue
Block a user