mirror of
https://github.com/empayre/fleet.git
synced 2024-11-06 00:45:19 +00:00
Reducing the number of deadlock errors. (#17290)
#16562 Based on local testing, these changes reduce the number of 'Deadlock found' errors when multiple hosts share the same UUID. The performance issues with vulnerability processing will be fixed as part of issue https://github.com/fleetdm/fleet/issues/16858 # Checklist for submitter <!-- Note that API documentation changes are now addressed by the product design team. --> - [x] Changes file added for user-visible changes in `changes/` or `orbit/changes/`. See [Changes files](https://fleetdm.com/docs/contributing/committing-changes#changes-files) for more information. - [ ] Added/updated tests - [x] Manual QA for all new/changed functionality
This commit is contained in:
parent
e9a464e0cf
commit
1052b6b350
1
changes/16562-sql-deadlock
Normal file
1
changes/16562-sql-deadlock
Normal file
@ -0,0 +1 @@
|
||||
Reduced the number of 'Deadlock found' errors seen by the server when multiple hosts share the same UUID
|
@ -4405,57 +4405,63 @@ func (ds *Datastore) UpdateHost(ctx context.Context, host *fleet.Host) error {
|
||||
refetch_critical_queries_until = ?
|
||||
WHERE id = ?
|
||||
`
|
||||
_, err := ds.writer(ctx).ExecContext(ctx, sqlStatement,
|
||||
host.DetailUpdatedAt,
|
||||
host.LabelUpdatedAt,
|
||||
host.PolicyUpdatedAt,
|
||||
host.NodeKey,
|
||||
host.Hostname,
|
||||
host.UUID,
|
||||
host.Platform,
|
||||
host.OsqueryVersion,
|
||||
host.OSVersion,
|
||||
host.Uptime,
|
||||
host.Memory,
|
||||
host.CPUType,
|
||||
host.CPUSubtype,
|
||||
host.CPUBrand,
|
||||
host.CPUPhysicalCores,
|
||||
host.HardwareVendor,
|
||||
host.HardwareModel,
|
||||
host.HardwareVersion,
|
||||
host.HardwareSerial,
|
||||
host.ComputerName,
|
||||
host.Build,
|
||||
host.PlatformLike,
|
||||
host.CodeName,
|
||||
host.CPULogicalCores,
|
||||
host.DistributedInterval,
|
||||
host.ConfigTLSRefresh,
|
||||
host.LoggerTLSPeriod,
|
||||
host.TeamID,
|
||||
host.PrimaryIP,
|
||||
host.PrimaryMac,
|
||||
host.PublicIP,
|
||||
host.RefetchRequested,
|
||||
host.OrbitNodeKey,
|
||||
host.RefetchCriticalQueriesUntil,
|
||||
host.ID,
|
||||
)
|
||||
if err != nil {
|
||||
return ctxerr.Wrapf(ctx, err, "save host with id %d", host.ID)
|
||||
}
|
||||
_, err = ds.writer(ctx).ExecContext(ctx, `
|
||||
return ds.withRetryTxx(
|
||||
ctx, func(tx sqlx.ExtContext) error {
|
||||
_, err := tx.ExecContext(
|
||||
ctx, sqlStatement,
|
||||
host.DetailUpdatedAt,
|
||||
host.LabelUpdatedAt,
|
||||
host.PolicyUpdatedAt,
|
||||
host.NodeKey,
|
||||
host.Hostname,
|
||||
host.UUID,
|
||||
host.Platform,
|
||||
host.OsqueryVersion,
|
||||
host.OSVersion,
|
||||
host.Uptime,
|
||||
host.Memory,
|
||||
host.CPUType,
|
||||
host.CPUSubtype,
|
||||
host.CPUBrand,
|
||||
host.CPUPhysicalCores,
|
||||
host.HardwareVendor,
|
||||
host.HardwareModel,
|
||||
host.HardwareVersion,
|
||||
host.HardwareSerial,
|
||||
host.ComputerName,
|
||||
host.Build,
|
||||
host.PlatformLike,
|
||||
host.CodeName,
|
||||
host.CPULogicalCores,
|
||||
host.DistributedInterval,
|
||||
host.ConfigTLSRefresh,
|
||||
host.LoggerTLSPeriod,
|
||||
host.TeamID,
|
||||
host.PrimaryIP,
|
||||
host.PrimaryMac,
|
||||
host.PublicIP,
|
||||
host.RefetchRequested,
|
||||
host.OrbitNodeKey,
|
||||
host.RefetchCriticalQueriesUntil,
|
||||
host.ID,
|
||||
)
|
||||
if err != nil {
|
||||
return ctxerr.Wrapf(ctx, err, "save host with id %d", host.ID)
|
||||
}
|
||||
_, err = tx.ExecContext(
|
||||
ctx, `
|
||||
UPDATE host_display_names
|
||||
SET display_name=?
|
||||
WHERE host_id=?`,
|
||||
host.DisplayName(),
|
||||
host.ID,
|
||||
host.DisplayName(),
|
||||
host.ID,
|
||||
)
|
||||
if err != nil {
|
||||
return ctxerr.Wrapf(ctx, err, "update host_display_names for host id %d", host.ID)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return ctxerr.Wrapf(ctx, err, "update host_display_names for host id %d", host.ID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ds *Datastore) OSVersion(ctx context.Context, osVersionID uint, teamID *uint) (*fleet.OSVersion, *time.Time, error) {
|
||||
|
@ -225,8 +225,13 @@ func withRetryTxx(ctx context.Context, db *sqlx.DB, fn txFn, logger log.Logger)
|
||||
return nil
|
||||
}
|
||||
|
||||
bo := backoff.NewExponentialBackOff()
|
||||
bo.MaxElapsedTime = 5 * time.Second
|
||||
expBo := backoff.NewExponentialBackOff()
|
||||
// MySQL innodb_lock_wait_timeout default is 50 seconds, so transaction can be waiting for a lock for several seconds.
|
||||
// Setting a higher MaxElapsedTime to increase probability that transaction will be retried.
|
||||
// This will reduce the number of retryable 'Deadlock found' errors. However, with a loaded DB, we will still see
|
||||
// 'Context cancelled' errors when the server drops long-lasting connections.
|
||||
expBo.MaxElapsedTime = 1 * time.Minute
|
||||
bo := backoff.WithMaxRetries(expBo, 5)
|
||||
return backoff.Retry(operation, bo)
|
||||
}
|
||||
|
||||
@ -333,8 +338,13 @@ func (ds *Datastore) writeChanLoop() {
|
||||
case *fleet.Host:
|
||||
item.errCh <- ds.UpdateHost(item.ctx, actualItem)
|
||||
case hostXUpdatedAt:
|
||||
query := fmt.Sprintf(`UPDATE hosts SET %s = ? WHERE id=?`, actualItem.what)
|
||||
_, err := ds.writer(item.ctx).ExecContext(item.ctx, query, actualItem.updatedAt, actualItem.hostID)
|
||||
err := ds.withRetryTxx(
|
||||
item.ctx, func(tx sqlx.ExtContext) error {
|
||||
query := fmt.Sprintf(`UPDATE hosts SET %s = ? WHERE id=?`, actualItem.what)
|
||||
_, err := tx.ExecContext(item.ctx, query, actualItem.updatedAt, actualItem.hostID)
|
||||
return err
|
||||
},
|
||||
)
|
||||
item.errCh <- ctxerr.Wrap(item.ctx, err, "updating hosts label updated at")
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user