Add the fleetctl debug {errors,db-locks} sub-commands (#3168)

This commit is contained in:
Martin Angers 2021-12-06 09:26:01 -05:00 committed by GitHub
parent eba5d1b1b6
commit b84ef79bf4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 281 additions and 24 deletions

View File

@ -0,0 +1 @@
* Add `fleetctl debug errors` to get the list of stored errors, and `fleetctl debug db-locks` to get the current db transactions locked (both reports are also added to the `fleetctl debug archive` output).

View File

@ -37,6 +37,8 @@ func debugCommand() *cli.Command {
debugHeapCommand(),
debugGoroutineCommand(),
debugTraceCommand(),
debugErrorsCommand(),
debugDBLocksCommand(),
debugArchiveCommand(),
debugConnectionCommand(),
debugMigrations(),
@ -269,6 +271,8 @@ func debugArchiveCommand() *cli.Command {
"allocs",
"block",
"cmdline",
"db-locks",
"errors",
"goroutine",
"heap",
"mutex",
@ -294,7 +298,23 @@ func debugArchiveCommand() *cli.Command {
defer tarwriter.Close()
for _, profile := range profiles {
res, err := fleet.DebugPprof(profile)
var res []byte
switch profile {
case "errors":
var buf bytes.Buffer
err = fleet.DebugErrors(&buf)
if err == nil {
res = buf.Bytes()
}
case "db-locks":
res, err = fleet.DebugDBLocks()
default:
res, err = fleet.DebugPprof(profile)
}
if err != nil {
// Don't fail the entire process on errors. We'll take what
// we can get if the servers are in a bad state and not
@ -492,6 +512,85 @@ Such migrations can be applied via "fleet prepare db" before running "fleet serv
}
}
func debugErrorsCommand() *cli.Command {
name := "errors"
return &cli.Command{
Name: name,
Usage: "Save the recorded fleet server errors to a file.",
UsageText: "Recording of errors and their retention period is controlled via the --logging_error_retention_period fleet command flag.",
Flags: []cli.Flag{
outfileFlag(),
configFlag(),
contextFlag(),
debugFlag(),
},
Action: func(c *cli.Context) error {
fleet, err := clientFromCLI(c)
if err != nil {
return err
}
outfile := getOutfile(c)
if outfile == "" {
outfile = outfileName(name)
}
f, err := os.OpenFile(outfile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, defaultFileMode)
if err != nil {
return err
}
defer f.Close()
if err := fleet.DebugErrors(f); err != nil {
return err
}
if err := f.Close(); err != nil {
return fmt.Errorf("write errors to file: %w", err)
}
fmt.Fprintf(os.Stderr, "Output written to %s\n", outfile)
return nil
},
}
}
func debugDBLocksCommand() *cli.Command {
name := "db-locks"
return &cli.Command{
Name: name,
Usage: "Save the current database transaction locking information to a file.",
UsageText: "Saves transaction locking information with queries that are waiting on or blocking other transactions.",
Flags: []cli.Flag{
outfileFlag(),
configFlag(),
contextFlag(),
debugFlag(),
},
Action: func(c *cli.Context) error {
fleet, err := clientFromCLI(c)
if err != nil {
return err
}
locks, err := fleet.DebugDBLocks()
if err != nil {
return err
}
outfile := getOutfile(c)
if outfile == "" {
outfile = outfileName(name)
}
if err := writeFile(outfile, locks, defaultFileMode); err != nil {
return fmt.Errorf("write %s to file: %w", name, err)
}
return nil
},
}
}
func resolveHostname(ctx context.Context, timeout time.Duration, host string) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

View File

@ -6,6 +6,7 @@ import (
"time"
"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
"github.com/fleetdm/fleet/v4/server/fleet"
)
func (d *Datastore) Lock(ctx context.Context, name string, owner string, expiration time.Duration) (bool, error) {
@ -56,3 +57,27 @@ func (d *Datastore) Unlock(ctx context.Context, name string, owner string) error
_, err := d.writer.ExecContext(ctx, `DELETE FROM locks WHERE name = ? and owner = ?`, name, owner)
return err
}
func (d *Datastore) DBLocks(ctx context.Context) ([]*fleet.DBLock, error) {
stmt := `
SELECT
r.trx_id waiting_trx_id,
r.trx_mysql_thread_id waiting_thread,
r.trx_query waiting_query,
b.trx_id blocking_trx_id,
b.trx_mysql_thread_id blocking_thread,
b.trx_query blocking_query
FROM information_schema.innodb_lock_waits w
INNER JOIN information_schema.innodb_trx b
ON b.trx_id = w.blocking_trx_id
INNER JOIN information_schema.innodb_trx r
ON r.trx_id = w.requesting_trx_id`
var locks []*fleet.DBLock
// Even though this is a Read, use the writer as we want the db locks from
// the primary database (the read replica should have little to no trx locks).
if err := d.writer.SelectContext(ctx, &locks, stmt); err != nil {
return nil, ctxerr.Wrap(ctx, err, "select locking information")
}
return locks, nil
}

View File

@ -2,6 +2,7 @@ package mysql
import (
"context"
"database/sql"
"testing"
"time"
@ -14,6 +15,22 @@ func TestLocks(t *testing.T) {
ds := CreateMySQLDS(t)
defer ds.Close()
cases := []struct {
name string
fn func(t *testing.T, ds *Datastore)
}{
{"LockUnlock", func(t *testing.T, ds *Datastore) { testLocksLockUnlock(t, ds) }},
{"DBLocks", func(t *testing.T, ds *Datastore) { testLocksDBLocks(t, ds) }},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
defer TruncateTables(t, ds)
c.fn(t, ds)
})
}
}
func testLocksLockUnlock(t *testing.T, ds *Datastore) {
owner1, err := server.GenerateRandomText(64)
require.NoError(t, err)
owner2, err := server.GenerateRandomText(64)
@ -58,3 +75,53 @@ func TestLocks(t *testing.T) {
require.NoError(t, err)
assert.True(t, locked)
}
func testLocksDBLocks(t *testing.T, ds *Datastore) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
t.Cleanup(cancel)
_, err := ds.writer.ExecContext(ctx, `CREATE TABLE deadlocks(a int primary key)`)
require.NoError(t, err)
t.Cleanup(func() {
_, err := ds.writer.ExecContext(ctx, `DROP TABLE deadlocks`)
require.NoError(t, err)
})
_, err = ds.writer.ExecContext(ctx, `INSERT INTO deadlocks(a) VALUES (0), (1)`)
require.NoError(t, err)
// cause a deadlock (see https://stackoverflow.com/a/31552794/1094941)
tx1, err := ds.writer.BeginTxx(ctx, &sql.TxOptions{Isolation: sql.LevelSerializable})
require.NoError(t, err)
defer tx1.Rollback()
tx2, err := ds.writer.BeginTxx(ctx, &sql.TxOptions{Isolation: sql.LevelSerializable})
require.NoError(t, err)
defer tx2.Rollback()
wait := make(chan struct{})
go func() {
var dst []int
err = tx1.SelectContext(ctx, &dst, `SELECT * FROM deadlocks WHERE a = 0`)
require.NoError(t, err)
err = tx2.SelectContext(ctx, &dst, `SELECT * FROM deadlocks WHERE a = 1`)
require.NoError(t, err)
close(wait)
_, err = tx1.ExecContext(ctx, `UPDATE deadlocks SET a = 0 WHERE a != 0`)
require.Error(t, err)
_, err = tx2.ExecContext(ctx, `UPDATE deadlocks SET a = 1 WHERE a != 1`)
require.Error(t, err)
}()
<-wait
locks, err := ds.DBLocks(ctx)
require.NoError(t, err)
require.Len(t, locks, 1)
require.NotNil(t, locks[0].WaitingQuery)
require.Equal(t, *locks[0].WaitingQuery, `UPDATE deadlocks SET a = 0 WHERE a != 0`)
require.NotEmpty(t, locks[0].BlockingTrxID)
require.NotEmpty(t, locks[0].WaitingTrxID)
require.NotZero(t, locks[0].BlockingThread)
require.NotZero(t, locks[0].WaitingThread)
}

View File

@ -399,6 +399,8 @@ type Datastore interface {
// Unlock tries to unlock the lock by that `name` for the specified
// `owner`. Unlocking when not holding the lock shouldn't error
Unlock(ctx context.Context, name string, owner string) error
// DBLocks returns the current database transaction lock waits information.
DBLocks(ctx context.Context) ([]*DBLock, error)
///////////////////////////////////////////////////////////////////////////////
// Aggregated Stats

12
server/fleet/db.go Normal file
View File

@ -0,0 +1,12 @@
package fleet
// DBLock represents a database transaction lock information as returned
// by datastore.DBLocks.
type DBLock struct {
WaitingTrxID string `db:"waiting_trx_id" json:"waiting_trx_id"`
WaitingThread uint64 `db:"waiting_thread" json:"waiting_thread"`
WaitingQuery *string `db:"waiting_query" json:"waiting_query,omitempty"`
BlockingTrxID string `db:"blocking_trx_id" json:"blocking_trx_id"`
BlockingThread uint64 `db:"blocking_thread" json:"blocking_thread"`
BlockingQuery *string `db:"blocking_query" json:"blocking_query,omitempty"`
}

View File

@ -1,16 +0,0 @@
package fleet
type Transactions interface {
Begin() (Transaction, error)
}
type Transaction interface {
Commit() error
Rollback() error
}
func HasTransaction(tx Transaction) OptionalArg {
return func() interface{} {
return tx
}
}

View File

@ -22,10 +22,3 @@ func (m *Store) MigrationStatus(ctx context.Context) (*fleet.MigrationStatus, er
return &fleet.MigrationStatus{}, nil
}
func (m *Store) Name() string { return "mock" }
type mockTransaction struct{}
func (m *mockTransaction) Commit() error { return nil }
func (m *mockTransaction) Rollback() error { return nil }
func (m *Store) Begin() (fleet.Transaction, error) { return &mockTransaction{}, nil }

View File

@ -319,6 +319,8 @@ type LockFunc func(ctx context.Context, name string, owner string, expiration ti
type UnlockFunc func(ctx context.Context, name string, owner string) error
type DBLocksFunc func(ctx context.Context) ([]*fleet.DBLock, error)
type UpdateScheduledQueryAggregatedStatsFunc func(ctx context.Context) error
type UpdateQueryAggregatedStatsFunc func(ctx context.Context) error
@ -786,6 +788,9 @@ type DataStore struct {
UnlockFunc UnlockFunc
UnlockFuncInvoked bool
DBLocksFunc DBLocksFunc
DBLocksFuncInvoked bool
UpdateScheduledQueryAggregatedStatsFunc UpdateScheduledQueryAggregatedStatsFunc
UpdateScheduledQueryAggregatedStatsFuncInvoked bool
@ -1563,6 +1568,11 @@ func (s *DataStore) Unlock(ctx context.Context, name string, owner string) error
return s.UnlockFunc(ctx, name, owner)
}
func (s *DataStore) DBLocks(ctx context.Context) ([]*fleet.DBLock, error) {
s.DBLocksFuncInvoked = true
return s.DBLocksFunc(ctx)
}
func (s *DataStore) UpdateScheduledQueryAggregatedStats(ctx context.Context) error {
s.UpdateScheduledQueryAggregatedStatsFuncInvoked = true
return s.UpdateScheduledQueryAggregatedStatsFunc(ctx)

View File

@ -2,6 +2,7 @@ package service
import (
"fmt"
"io"
"io/ioutil"
"net/http"
@ -40,3 +41,48 @@ func (c *Client) DebugMigrations() (*fleet.MigrationStatus, error) {
}
return &migrationStatus, nil
}
// DebugErrors calls the /debug/errors endpoint and on success writes its
// (potentially large) response body to w.
func (c *Client) DebugErrors(w io.Writer) error {
endpoint := "/debug/errors"
response, err := c.AuthenticatedDo("GET", endpoint, "", nil)
if err != nil {
return fmt.Errorf("GET %s: %w", endpoint, err)
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
return fmt.Errorf("get errors received status %d", response.StatusCode)
}
if _, err := io.Copy(w, response.Body); err != nil {
return fmt.Errorf("read errors response body: %w", err)
}
return nil
}
// DebugDBLocks calls the /debug/dblocks endpoint and on success returns its
// response body data.
func (c *Client) DebugDBLocks() ([]byte, error) {
endpoint := "/debug/dblocks"
response, err := c.AuthenticatedDo("GET", endpoint, "", nil)
if err != nil {
return nil, fmt.Errorf("GET %s: %w", endpoint, err)
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
if response.StatusCode == http.StatusInternalServerError {
return nil, fmt.Errorf("get dblocks received status %d; note that this is currently only supported for mysql 5.7 and the database user must have PROCESS privilege, see the fleet logs for error details", response.StatusCode)
}
return nil, fmt.Errorf("get dblocks received status %d", response.StatusCode)
}
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return nil, fmt.Errorf("read dblocks response body: %w", err)
}
return body, nil
}

View File

@ -12,6 +12,7 @@ import (
"github.com/fleetdm/fleet/v4/server/fleet"
kitlog "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/gorilla/mux"
)
@ -57,11 +58,28 @@ func MakeDebugHandler(svc fleet.Service, config config.FleetConfig, logger kitlo
r.HandleFunc("/debug/migrations", func(rw http.ResponseWriter, r *http.Request) {
status, err := ds.MigrationStatus(r.Context())
if err != nil {
level.Error(logger).Log("err", err)
rw.WriteHeader(http.StatusInternalServerError)
return
}
b, err := json.Marshal(&status)
if err != nil {
level.Error(logger).Log("err", err)
rw.WriteHeader(http.StatusInternalServerError)
return
}
rw.Write(b)
})
r.HandleFunc("/debug/dblocks", func(rw http.ResponseWriter, r *http.Request) {
locks, err := ds.DBLocks(r.Context())
if err != nil {
level.Error(logger).Log("err", err)
rw.WriteHeader(http.StatusInternalServerError)
return
}
b, err := json.Marshal(locks)
if err != nil {
level.Error(logger).Log("err", err)
rw.WriteHeader(http.StatusInternalServerError)
return
}