mirror of
https://github.com/empayre/fleet.git
synced 2024-11-06 08:55:24 +00:00
Add the fleetctl debug {errors,db-locks} sub-commands (#3168)
This commit is contained in:
parent
eba5d1b1b6
commit
b84ef79bf4
1
changes/issue-2027-add-fleetctl-debug-subcommands
Normal file
1
changes/issue-2027-add-fleetctl-debug-subcommands
Normal file
@ -0,0 +1 @@
|
||||
* Add `fleetctl debug errors` to get the list of stored errors, and `fleetctl debug db-locks` to get the current db transactions locked (both reports are also added to the `fleetctl debug archive` output).
|
@ -37,6 +37,8 @@ func debugCommand() *cli.Command {
|
||||
debugHeapCommand(),
|
||||
debugGoroutineCommand(),
|
||||
debugTraceCommand(),
|
||||
debugErrorsCommand(),
|
||||
debugDBLocksCommand(),
|
||||
debugArchiveCommand(),
|
||||
debugConnectionCommand(),
|
||||
debugMigrations(),
|
||||
@ -269,6 +271,8 @@ func debugArchiveCommand() *cli.Command {
|
||||
"allocs",
|
||||
"block",
|
||||
"cmdline",
|
||||
"db-locks",
|
||||
"errors",
|
||||
"goroutine",
|
||||
"heap",
|
||||
"mutex",
|
||||
@ -294,7 +298,23 @@ func debugArchiveCommand() *cli.Command {
|
||||
defer tarwriter.Close()
|
||||
|
||||
for _, profile := range profiles {
|
||||
res, err := fleet.DebugPprof(profile)
|
||||
var res []byte
|
||||
|
||||
switch profile {
|
||||
case "errors":
|
||||
var buf bytes.Buffer
|
||||
err = fleet.DebugErrors(&buf)
|
||||
if err == nil {
|
||||
res = buf.Bytes()
|
||||
}
|
||||
|
||||
case "db-locks":
|
||||
res, err = fleet.DebugDBLocks()
|
||||
|
||||
default:
|
||||
res, err = fleet.DebugPprof(profile)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// Don't fail the entire process on errors. We'll take what
|
||||
// we can get if the servers are in a bad state and not
|
||||
@ -492,6 +512,85 @@ Such migrations can be applied via "fleet prepare db" before running "fleet serv
|
||||
}
|
||||
}
|
||||
|
||||
func debugErrorsCommand() *cli.Command {
|
||||
name := "errors"
|
||||
return &cli.Command{
|
||||
Name: name,
|
||||
Usage: "Save the recorded fleet server errors to a file.",
|
||||
UsageText: "Recording of errors and their retention period is controlled via the --logging_error_retention_period fleet command flag.",
|
||||
Flags: []cli.Flag{
|
||||
outfileFlag(),
|
||||
configFlag(),
|
||||
contextFlag(),
|
||||
debugFlag(),
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
fleet, err := clientFromCLI(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
outfile := getOutfile(c)
|
||||
if outfile == "" {
|
||||
outfile = outfileName(name)
|
||||
}
|
||||
|
||||
f, err := os.OpenFile(outfile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, defaultFileMode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := fleet.DebugErrors(f); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
return fmt.Errorf("write errors to file: %w", err)
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "Output written to %s\n", outfile)
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func debugDBLocksCommand() *cli.Command {
|
||||
name := "db-locks"
|
||||
return &cli.Command{
|
||||
Name: name,
|
||||
Usage: "Save the current database transaction locking information to a file.",
|
||||
UsageText: "Saves transaction locking information with queries that are waiting on or blocking other transactions.",
|
||||
Flags: []cli.Flag{
|
||||
outfileFlag(),
|
||||
configFlag(),
|
||||
contextFlag(),
|
||||
debugFlag(),
|
||||
},
|
||||
Action: func(c *cli.Context) error {
|
||||
fleet, err := clientFromCLI(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
locks, err := fleet.DebugDBLocks()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
outfile := getOutfile(c)
|
||||
if outfile == "" {
|
||||
outfile = outfileName(name)
|
||||
}
|
||||
|
||||
if err := writeFile(outfile, locks, defaultFileMode); err != nil {
|
||||
return fmt.Errorf("write %s to file: %w", name, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func resolveHostname(ctx context.Context, timeout time.Duration, host string) error {
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
@ -6,6 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
|
||||
"github.com/fleetdm/fleet/v4/server/fleet"
|
||||
)
|
||||
|
||||
func (d *Datastore) Lock(ctx context.Context, name string, owner string, expiration time.Duration) (bool, error) {
|
||||
@ -56,3 +57,27 @@ func (d *Datastore) Unlock(ctx context.Context, name string, owner string) error
|
||||
_, err := d.writer.ExecContext(ctx, `DELETE FROM locks WHERE name = ? and owner = ?`, name, owner)
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Datastore) DBLocks(ctx context.Context) ([]*fleet.DBLock, error) {
|
||||
stmt := `
|
||||
SELECT
|
||||
r.trx_id waiting_trx_id,
|
||||
r.trx_mysql_thread_id waiting_thread,
|
||||
r.trx_query waiting_query,
|
||||
b.trx_id blocking_trx_id,
|
||||
b.trx_mysql_thread_id blocking_thread,
|
||||
b.trx_query blocking_query
|
||||
FROM information_schema.innodb_lock_waits w
|
||||
INNER JOIN information_schema.innodb_trx b
|
||||
ON b.trx_id = w.blocking_trx_id
|
||||
INNER JOIN information_schema.innodb_trx r
|
||||
ON r.trx_id = w.requesting_trx_id`
|
||||
|
||||
var locks []*fleet.DBLock
|
||||
// Even though this is a Read, use the writer as we want the db locks from
|
||||
// the primary database (the read replica should have little to no trx locks).
|
||||
if err := d.writer.SelectContext(ctx, &locks, stmt); err != nil {
|
||||
return nil, ctxerr.Wrap(ctx, err, "select locking information")
|
||||
}
|
||||
return locks, nil
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package mysql
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -14,6 +15,22 @@ func TestLocks(t *testing.T) {
|
||||
ds := CreateMySQLDS(t)
|
||||
defer ds.Close()
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
fn func(t *testing.T, ds *Datastore)
|
||||
}{
|
||||
{"LockUnlock", func(t *testing.T, ds *Datastore) { testLocksLockUnlock(t, ds) }},
|
||||
{"DBLocks", func(t *testing.T, ds *Datastore) { testLocksDBLocks(t, ds) }},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
defer TruncateTables(t, ds)
|
||||
c.fn(t, ds)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func testLocksLockUnlock(t *testing.T, ds *Datastore) {
|
||||
owner1, err := server.GenerateRandomText(64)
|
||||
require.NoError(t, err)
|
||||
owner2, err := server.GenerateRandomText(64)
|
||||
@ -58,3 +75,53 @@ func TestLocks(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
assert.True(t, locked)
|
||||
}
|
||||
|
||||
func testLocksDBLocks(t *testing.T, ds *Datastore) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
t.Cleanup(cancel)
|
||||
|
||||
_, err := ds.writer.ExecContext(ctx, `CREATE TABLE deadlocks(a int primary key)`)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Cleanup(func() {
|
||||
_, err := ds.writer.ExecContext(ctx, `DROP TABLE deadlocks`)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
_, err = ds.writer.ExecContext(ctx, `INSERT INTO deadlocks(a) VALUES (0), (1)`)
|
||||
require.NoError(t, err)
|
||||
|
||||
// cause a deadlock (see https://stackoverflow.com/a/31552794/1094941)
|
||||
tx1, err := ds.writer.BeginTxx(ctx, &sql.TxOptions{Isolation: sql.LevelSerializable})
|
||||
require.NoError(t, err)
|
||||
defer tx1.Rollback()
|
||||
tx2, err := ds.writer.BeginTxx(ctx, &sql.TxOptions{Isolation: sql.LevelSerializable})
|
||||
require.NoError(t, err)
|
||||
defer tx2.Rollback()
|
||||
|
||||
wait := make(chan struct{})
|
||||
go func() {
|
||||
var dst []int
|
||||
err = tx1.SelectContext(ctx, &dst, `SELECT * FROM deadlocks WHERE a = 0`)
|
||||
require.NoError(t, err)
|
||||
err = tx2.SelectContext(ctx, &dst, `SELECT * FROM deadlocks WHERE a = 1`)
|
||||
require.NoError(t, err)
|
||||
|
||||
close(wait)
|
||||
_, err = tx1.ExecContext(ctx, `UPDATE deadlocks SET a = 0 WHERE a != 0`)
|
||||
require.Error(t, err)
|
||||
_, err = tx2.ExecContext(ctx, `UPDATE deadlocks SET a = 1 WHERE a != 1`)
|
||||
require.Error(t, err)
|
||||
}()
|
||||
|
||||
<-wait
|
||||
locks, err := ds.DBLocks(ctx)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, locks, 1)
|
||||
require.NotNil(t, locks[0].WaitingQuery)
|
||||
require.Equal(t, *locks[0].WaitingQuery, `UPDATE deadlocks SET a = 0 WHERE a != 0`)
|
||||
require.NotEmpty(t, locks[0].BlockingTrxID)
|
||||
require.NotEmpty(t, locks[0].WaitingTrxID)
|
||||
require.NotZero(t, locks[0].BlockingThread)
|
||||
require.NotZero(t, locks[0].WaitingThread)
|
||||
}
|
||||
|
@ -399,6 +399,8 @@ type Datastore interface {
|
||||
// Unlock tries to unlock the lock by that `name` for the specified
|
||||
// `owner`. Unlocking when not holding the lock shouldn't error
|
||||
Unlock(ctx context.Context, name string, owner string) error
|
||||
// DBLocks returns the current database transaction lock waits information.
|
||||
DBLocks(ctx context.Context) ([]*DBLock, error)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Aggregated Stats
|
||||
|
12
server/fleet/db.go
Normal file
12
server/fleet/db.go
Normal file
@ -0,0 +1,12 @@
|
||||
package fleet
|
||||
|
||||
// DBLock represents a database transaction lock information as returned
|
||||
// by datastore.DBLocks.
|
||||
type DBLock struct {
|
||||
WaitingTrxID string `db:"waiting_trx_id" json:"waiting_trx_id"`
|
||||
WaitingThread uint64 `db:"waiting_thread" json:"waiting_thread"`
|
||||
WaitingQuery *string `db:"waiting_query" json:"waiting_query,omitempty"`
|
||||
BlockingTrxID string `db:"blocking_trx_id" json:"blocking_trx_id"`
|
||||
BlockingThread uint64 `db:"blocking_thread" json:"blocking_thread"`
|
||||
BlockingQuery *string `db:"blocking_query" json:"blocking_query,omitempty"`
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
package fleet
|
||||
|
||||
type Transactions interface {
|
||||
Begin() (Transaction, error)
|
||||
}
|
||||
|
||||
type Transaction interface {
|
||||
Commit() error
|
||||
Rollback() error
|
||||
}
|
||||
|
||||
func HasTransaction(tx Transaction) OptionalArg {
|
||||
return func() interface{} {
|
||||
return tx
|
||||
}
|
||||
}
|
@ -22,10 +22,3 @@ func (m *Store) MigrationStatus(ctx context.Context) (*fleet.MigrationStatus, er
|
||||
return &fleet.MigrationStatus{}, nil
|
||||
}
|
||||
func (m *Store) Name() string { return "mock" }
|
||||
|
||||
type mockTransaction struct{}
|
||||
|
||||
func (m *mockTransaction) Commit() error { return nil }
|
||||
func (m *mockTransaction) Rollback() error { return nil }
|
||||
|
||||
func (m *Store) Begin() (fleet.Transaction, error) { return &mockTransaction{}, nil }
|
||||
|
@ -319,6 +319,8 @@ type LockFunc func(ctx context.Context, name string, owner string, expiration ti
|
||||
|
||||
type UnlockFunc func(ctx context.Context, name string, owner string) error
|
||||
|
||||
type DBLocksFunc func(ctx context.Context) ([]*fleet.DBLock, error)
|
||||
|
||||
type UpdateScheduledQueryAggregatedStatsFunc func(ctx context.Context) error
|
||||
|
||||
type UpdateQueryAggregatedStatsFunc func(ctx context.Context) error
|
||||
@ -786,6 +788,9 @@ type DataStore struct {
|
||||
UnlockFunc UnlockFunc
|
||||
UnlockFuncInvoked bool
|
||||
|
||||
DBLocksFunc DBLocksFunc
|
||||
DBLocksFuncInvoked bool
|
||||
|
||||
UpdateScheduledQueryAggregatedStatsFunc UpdateScheduledQueryAggregatedStatsFunc
|
||||
UpdateScheduledQueryAggregatedStatsFuncInvoked bool
|
||||
|
||||
@ -1563,6 +1568,11 @@ func (s *DataStore) Unlock(ctx context.Context, name string, owner string) error
|
||||
return s.UnlockFunc(ctx, name, owner)
|
||||
}
|
||||
|
||||
func (s *DataStore) DBLocks(ctx context.Context) ([]*fleet.DBLock, error) {
|
||||
s.DBLocksFuncInvoked = true
|
||||
return s.DBLocksFunc(ctx)
|
||||
}
|
||||
|
||||
func (s *DataStore) UpdateScheduledQueryAggregatedStats(ctx context.Context) error {
|
||||
s.UpdateScheduledQueryAggregatedStatsFuncInvoked = true
|
||||
return s.UpdateScheduledQueryAggregatedStatsFunc(ctx)
|
||||
|
@ -2,6 +2,7 @@ package service
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
|
||||
@ -40,3 +41,48 @@ func (c *Client) DebugMigrations() (*fleet.MigrationStatus, error) {
|
||||
}
|
||||
return &migrationStatus, nil
|
||||
}
|
||||
|
||||
// DebugErrors calls the /debug/errors endpoint and on success writes its
|
||||
// (potentially large) response body to w.
|
||||
func (c *Client) DebugErrors(w io.Writer) error {
|
||||
endpoint := "/debug/errors"
|
||||
response, err := c.AuthenticatedDo("GET", endpoint, "", nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("GET %s: %w", endpoint, err)
|
||||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("get errors received status %d", response.StatusCode)
|
||||
}
|
||||
|
||||
if _, err := io.Copy(w, response.Body); err != nil {
|
||||
return fmt.Errorf("read errors response body: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DebugDBLocks calls the /debug/dblocks endpoint and on success returns its
|
||||
// response body data.
|
||||
func (c *Client) DebugDBLocks() ([]byte, error) {
|
||||
endpoint := "/debug/dblocks"
|
||||
response, err := c.AuthenticatedDo("GET", endpoint, "", nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GET %s: %w", endpoint, err)
|
||||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode != http.StatusOK {
|
||||
if response.StatusCode == http.StatusInternalServerError {
|
||||
return nil, fmt.Errorf("get dblocks received status %d; note that this is currently only supported for mysql 5.7 and the database user must have PROCESS privilege, see the fleet logs for error details", response.StatusCode)
|
||||
}
|
||||
return nil, fmt.Errorf("get dblocks received status %d", response.StatusCode)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read dblocks response body: %w", err)
|
||||
}
|
||||
|
||||
return body, nil
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
"github.com/fleetdm/fleet/v4/server/fleet"
|
||||
|
||||
kitlog "github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
@ -57,11 +58,28 @@ func MakeDebugHandler(svc fleet.Service, config config.FleetConfig, logger kitlo
|
||||
r.HandleFunc("/debug/migrations", func(rw http.ResponseWriter, r *http.Request) {
|
||||
status, err := ds.MigrationStatus(r.Context())
|
||||
if err != nil {
|
||||
level.Error(logger).Log("err", err)
|
||||
rw.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
b, err := json.Marshal(&status)
|
||||
if err != nil {
|
||||
level.Error(logger).Log("err", err)
|
||||
rw.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
rw.Write(b)
|
||||
})
|
||||
r.HandleFunc("/debug/dblocks", func(rw http.ResponseWriter, r *http.Request) {
|
||||
locks, err := ds.DBLocks(r.Context())
|
||||
if err != nil {
|
||||
level.Error(logger).Log("err", err)
|
||||
rw.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
b, err := json.Marshal(locks)
|
||||
if err != nil {
|
||||
level.Error(logger).Log("err", err)
|
||||
rw.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user