mirror of
https://github.com/empayre/fleet.git
synced 2024-11-06 00:45:19 +00:00
Update backend MDM migration flow and add logging to aid in debugging migration errors (#16627)
This commit is contained in:
parent
38d6e94f9f
commit
78aa1b0080
1
changes/14444-mdm-migration-debug
Normal file
1
changes/14444-mdm-migration-debug
Normal file
@ -0,0 +1 @@
|
||||
- Updated backend MDM migration flow and added logging to aid in debugging migration errors.
|
@ -8,6 +8,7 @@ import (
|
||||
"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
|
||||
hostctx "github.com/fleetdm/fleet/v4/server/contexts/host"
|
||||
"github.com/fleetdm/fleet/v4/server/fleet"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
)
|
||||
|
||||
func (svc *Service) ListDevicePolicies(ctx context.Context, host *fleet.Host) ([]*fleet.HostPolicy, error) {
|
||||
@ -25,6 +26,9 @@ const refetchMDMUnenrollCriticalQueryDuration = 3 * time.Minute
|
||||
// the server/webhooks one because it is a Fleet Premium only feature and for
|
||||
// licensing reasons this needs to live under this package.
|
||||
func (svc *Service) TriggerMigrateMDMDevice(ctx context.Context, host *fleet.Host) error {
|
||||
level.Debug(svc.logger).Log("msg", "trigger migration webhook", "host_id", host.ID,
|
||||
"refetch_critical_queries_until", host.RefetchCriticalQueriesUntil)
|
||||
|
||||
ac, err := svc.ds.AppConfig(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -33,6 +37,15 @@ func (svc *Service) TriggerMigrateMDMDevice(ctx context.Context, host *fleet.Hos
|
||||
return fleet.ErrMDMNotConfigured
|
||||
}
|
||||
|
||||
if host.RefetchCriticalQueriesUntil != nil && host.RefetchCriticalQueriesUntil.After(svc.clock.Now()) {
|
||||
// the webhook has already been triggered successfully recently (within the
|
||||
// refetch critical queries delay), so return as if it did send it successfully
|
||||
// but do not re-send.
|
||||
level.Debug(svc.logger).Log("msg", "waiting for critical queries refetch, skip sending webhook",
|
||||
"host_id", host.ID)
|
||||
return nil
|
||||
}
|
||||
|
||||
var bre fleet.BadRequestError
|
||||
switch {
|
||||
case !ac.MDM.MacOSMigration.Enable:
|
||||
@ -41,11 +54,6 @@ func (svc *Service) TriggerMigrateMDMDevice(ctx context.Context, host *fleet.Hos
|
||||
bre.InternalErr = ctxerr.New(ctx, "macOS migration webhook URL not configured")
|
||||
case !host.IsEligibleForDEPMigration():
|
||||
bre.InternalErr = ctxerr.New(ctx, "host not eligible for macOS migration")
|
||||
case host.RefetchCriticalQueriesUntil != nil && host.RefetchCriticalQueriesUntil.After(svc.clock.Now()):
|
||||
// the webhook has already been triggered successfully recently (within the
|
||||
// refetch critical queries delay), so do as if it did send it successfully
|
||||
// but do not re-send.
|
||||
return nil
|
||||
}
|
||||
if bre.InternalErr != nil {
|
||||
return &bre
|
||||
@ -66,7 +74,7 @@ func (svc *Service) TriggerMigrateMDMDevice(ctx context.Context, host *fleet.Hos
|
||||
// existing third-party MDM.
|
||||
refetchUntil := svc.clock.Now().Add(refetchMDMUnenrollCriticalQueryDuration)
|
||||
host.RefetchCriticalQueriesUntil = &refetchUntil
|
||||
if err := svc.ds.UpdateHost(ctx, host); err != nil {
|
||||
if err := svc.ds.UpdateHostRefetchCriticalQueriesUntil(ctx, host.ID, &refetchUntil); err != nil {
|
||||
return ctxerr.Wrap(ctx, err, "save host with refetch critical queries timestamp")
|
||||
}
|
||||
|
||||
|
@ -408,12 +408,15 @@ type mdmMigrationHandler struct {
|
||||
}
|
||||
|
||||
func (m *mdmMigrationHandler) NotifyRemote() error {
|
||||
log.Debug().Msg("sending request to trigger mdm migration webhook")
|
||||
log.Info().Msg("sending request to trigger mdm migration webhook")
|
||||
|
||||
// TODO: Revisit if/when we should hide the migration menu item depending on the
|
||||
// result of the client request.
|
||||
if err := m.client.MigrateMDM(m.tokenReader.GetCached()); err != nil {
|
||||
log.Error().Err(err).Msg("triggering migration webhook")
|
||||
return fmt.Errorf("on migration start: %w", err)
|
||||
}
|
||||
log.Debug().Msg("successfully sent request to trigger mdm migration webhook")
|
||||
log.Info().Msg("successfully sent request to trigger mdm migration webhook")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -4271,9 +4271,30 @@ func (ds *Datastore) UpdateHostRefetchRequested(ctx context.Context, id uint, va
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateHostRefetchCriticalQueriesUntil updates a host's refetch critical queries until field.
|
||||
func (ds *Datastore) UpdateHostRefetchCriticalQueriesUntil(ctx context.Context, id uint, until *time.Time) error {
|
||||
debugLogs := []interface{}{"msg", "update refetch_critical_queries_until", "host_id", id}
|
||||
if until != nil {
|
||||
debugLogs = append(debugLogs, "until", until.Format(time.RFC3339))
|
||||
}
|
||||
level.Debug(ds.logger).Log(debugLogs...)
|
||||
|
||||
sqlStatement := `UPDATE hosts SET refetch_critical_queries_until = ? WHERE id = ?`
|
||||
_, err := ds.writer(ctx).ExecContext(ctx, sqlStatement, until, id)
|
||||
if err != nil {
|
||||
return ctxerr.Wrapf(ctx, err, "update host %d refetch_critical_queries_until", id)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateHost updates all columns of the `hosts` table.
|
||||
// It only updates `hosts` table, other additional host information is ignored.
|
||||
func (ds *Datastore) UpdateHost(ctx context.Context, host *fleet.Host) error {
|
||||
if host.OrbitNodeKey == nil || *host.OrbitNodeKey == "" {
|
||||
level.Debug(ds.logger).Log("msg", "missing orbit_node_key to update host",
|
||||
"host_id", host.ID, "node_key", host.NodeKey, "orbit_node_key", host.OrbitNodeKey)
|
||||
}
|
||||
|
||||
sqlStatement := `
|
||||
UPDATE hosts SET
|
||||
detail_updated_at = ?,
|
||||
|
@ -718,6 +718,9 @@ type Datastore interface {
|
||||
// UpdateHostRefetchRequested updates a host's refetch requested field.
|
||||
UpdateHostRefetchRequested(ctx context.Context, hostID uint, value bool) error
|
||||
|
||||
// UpdateHostRefetchCriticalQueriesUntil updates a host's refetch critical queries until field.
|
||||
UpdateHostRefetchCriticalQueriesUntil(ctx context.Context, hostID uint, until *time.Time) error
|
||||
|
||||
// FlippingPoliciesForHost fetches the policies with incoming results and returns:
|
||||
// - a list of "new" failing policies; "new" here means those that fail on their first
|
||||
// run, and those that were passing on the previous run and are failing on the incoming execution.
|
||||
|
@ -525,6 +525,12 @@ func (d *DEPService) processDeviceResponse(ctx context.Context, depClient *godep
|
||||
}
|
||||
logs = append(logs, logCountsForResults(apiResp.Devices)...)
|
||||
level.Info(logger).Log(logs...)
|
||||
|
||||
debugLogs := []interface{}{"msg", "assign profile responses by device"}
|
||||
for k, v := range apiResp.Devices {
|
||||
debugLogs = append(debugLogs, k, v)
|
||||
}
|
||||
level.Debug(logger).Log(debugLogs...)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -508,6 +508,8 @@ type ListScheduledQueriesInPackFunc func(ctx context.Context, packID uint) (flee
|
||||
|
||||
type UpdateHostRefetchRequestedFunc func(ctx context.Context, hostID uint, value bool) error
|
||||
|
||||
type UpdateHostRefetchCriticalQueriesUntilFunc func(ctx context.Context, hostID uint, until *time.Time) error
|
||||
|
||||
type FlippingPoliciesForHostFunc func(ctx context.Context, hostID uint, incomingResults map[uint]*bool) (newFailing []uint, newPassing []uint, err error)
|
||||
|
||||
type RecordPolicyQueryExecutionsFunc func(ctx context.Context, host *fleet.Host, results map[uint]*bool, updated time.Time, deferredSaveHost bool) error
|
||||
@ -1540,6 +1542,9 @@ type DataStore struct {
|
||||
UpdateHostRefetchRequestedFunc UpdateHostRefetchRequestedFunc
|
||||
UpdateHostRefetchRequestedFuncInvoked bool
|
||||
|
||||
UpdateHostRefetchCriticalQueriesUntilFunc UpdateHostRefetchCriticalQueriesUntilFunc
|
||||
UpdateHostRefetchCriticalQueriesUntilFuncInvoked bool
|
||||
|
||||
FlippingPoliciesForHostFunc FlippingPoliciesForHostFunc
|
||||
FlippingPoliciesForHostFuncInvoked bool
|
||||
|
||||
@ -3702,6 +3707,13 @@ func (s *DataStore) UpdateHostRefetchRequested(ctx context.Context, hostID uint,
|
||||
return s.UpdateHostRefetchRequestedFunc(ctx, hostID, value)
|
||||
}
|
||||
|
||||
func (s *DataStore) UpdateHostRefetchCriticalQueriesUntil(ctx context.Context, hostID uint, until *time.Time) error {
|
||||
s.mu.Lock()
|
||||
s.UpdateHostRefetchCriticalQueriesUntilFuncInvoked = true
|
||||
s.mu.Unlock()
|
||||
return s.UpdateHostRefetchCriticalQueriesUntilFunc(ctx, hostID, until)
|
||||
}
|
||||
|
||||
func (s *DataStore) FlippingPoliciesForHost(ctx context.Context, hostID uint, incomingResults map[uint]*bool) (newFailing []uint, newPassing []uint, err error) {
|
||||
s.mu.Lock()
|
||||
s.FlippingPoliciesForHostFuncInvoked = true
|
||||
|
@ -5761,6 +5761,8 @@ func (s *integrationMDMTestSuite) TestMigrateMDMDeviceWebhook() {
|
||||
require.NotNil(t, h.RefetchCriticalQueriesUntil)
|
||||
require.True(t, h.RefetchCriticalQueriesUntil.After(time.Now()))
|
||||
|
||||
require.NoError(t, s.ds.UpdateHostRefetchCriticalQueriesUntil(context.Background(), h.ID, nil))
|
||||
|
||||
// bad token
|
||||
s.Do("POST", fmt.Sprintf("/api/v1/fleet/device/%s/migrate_mdm", "bad-token"), nil, http.StatusUnauthorized)
|
||||
require.False(t, webhookCalled)
|
||||
|
@ -1069,6 +1069,9 @@ func (svc *Service) SubmitDistributedQueryResults(
|
||||
host.RefetchRequested = false
|
||||
}
|
||||
refetchCriticalCleared := refetchCriticalSet && host.RefetchCriticalQueriesUntil == nil
|
||||
if refetchCriticalSet {
|
||||
level.Debug(svc.logger).Log("msg", "refetch critical status on submit distributed query results", "host_id", host.ID, "refetch_requested", refetchRequested, "refetch_critical_queries_until", host.RefetchCriticalQueriesUntil, "refetch_critical_cleared", refetchCriticalCleared)
|
||||
}
|
||||
|
||||
if refetchRequested || detailUpdated || refetchCriticalCleared {
|
||||
appConfig, err := svc.ds.AppConfig(ctx)
|
||||
|
@ -1453,6 +1453,12 @@ func directIngestMDMMac(ctx context.Context, logger log.Logger, host *fleet.Host
|
||||
logger.Log("component", "service", "method", "ingestMDM", "warn",
|
||||
fmt.Sprintf("mdm expected single result got %d", len(rows)))
|
||||
}
|
||||
|
||||
if host.RefetchCriticalQueriesUntil != nil {
|
||||
level.Debug(logger).Log("msg", "ingesting macos mdm data during refetch critical queries window", "host_id", host.ID,
|
||||
"data", fmt.Sprintf("%+v", rows))
|
||||
}
|
||||
|
||||
enrolledVal := rows[0]["enrolled"]
|
||||
if enrolledVal == "" {
|
||||
return ctxerr.Wrap(ctx, fmt.Errorf("missing mdm.enrolled value: %d", host.ID))
|
||||
|
@ -6,9 +6,13 @@ import (
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
const DELAY = 10 * time.Second // adjust this to simulate slow webhook response
|
||||
|
||||
func main() {
|
||||
http.HandleFunc("/", func(writer http.ResponseWriter, request *http.Request) {
|
||||
var detail string
|
||||
@ -19,8 +23,10 @@ func main() {
|
||||
detail = fmt.Sprintf("| BODY: %s", string(body))
|
||||
}
|
||||
log.Printf("%s %s %s\n", request.Method, request.URL.Path, detail)
|
||||
if err := request.Write(writer); err != nil {
|
||||
log.Fatalf(err.Error())
|
||||
|
||||
time.Sleep(DELAY)
|
||||
if _, err := writer.Write(nil); err != nil {
|
||||
log.Printf("error writing response %s", err.Error())
|
||||
}
|
||||
})
|
||||
|
||||
@ -32,9 +38,14 @@ func main() {
|
||||
fmt.Printf("Server running at http://localhost%s\n", port)
|
||||
server := &http.Server{
|
||||
Addr: port,
|
||||
ReadHeaderTimeout: 3 * time.Second,
|
||||
}
|
||||
if err := server.ListenAndServe(); err != nil {
|
||||
log.Fatalf(err.Error())
|
||||
ReadHeaderTimeout: 30 * time.Second,
|
||||
}
|
||||
go func() {
|
||||
if err := server.ListenAndServe(); err != nil {
|
||||
log.Fatalf("server error %s", err.Error())
|
||||
}
|
||||
}()
|
||||
sig := make(chan os.Signal, 1)
|
||||
signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)
|
||||
<-sig // block on signal
|
||||
}
|
||||
|
@ -13,6 +13,8 @@ Scripts in this directory aim to ease the testing of Orbit and the [TUF](https:/
|
||||
> PS: We use `host.docker.internal` because the testing certificate `./tools/osquery/fleet.crt`
|
||||
> has such hostname (and `localhost`) defined as SANs.
|
||||
|
||||
> PPS: Make sure you set the macOSX deployment target to the lowest macOS version you intend to support. See [Troubleshooting](#troubleshooting) for more details.
|
||||
|
||||
# Run
|
||||
|
||||
The `main.sh` creates and runs the TUF repository and optionally generate the installers (GENERATE_PKGS):
|
||||
@ -94,7 +96,7 @@ When running Fleet Desktop on an older macOS version than it was compiled on, Or
|
||||
_LSOpenURLsWithCompletionHandler() failed with error -10825
|
||||
```
|
||||
|
||||
Solution: `Set the MACOSX_DEPLOYMENT_TARGET` environment variable to the lowest macOS version you intend to support:
|
||||
Solution: Set the `MACOSX_DEPLOYMENT_TARGET` environment variable to the lowest macOS version you intend to support:
|
||||
|
||||
```
|
||||
export MACOSX_DEPLOYMENT_TARGET=13 # replace '13' with your target macOS version
|
||||
|
@ -115,8 +115,7 @@ for system in $SYSTEMS; do
|
||||
|
||||
# Add swiftDialog on macos (if enabled).
|
||||
if [[ $system == "macos" && -n "$SWIFT_DIALOG" ]]; then
|
||||
# For now we always make swiftDialog (until it's uploaded to our TUF repo)
|
||||
make swift-dialog-app-tar-gz version=$SWIFT_DIALOG_MACOS_APP_VERSION build=$SWIFT_DIALOG_MACOS_APP_BUILD_VERSION out-path=.
|
||||
curl https://tuf.fleetctl.com/targets/swiftDialog/macos/stable/swiftDialog.app.tar.gz --output swiftDialog.app.tar.gz
|
||||
|
||||
./build/fleetctl updates add \
|
||||
--path $TUF_PATH \
|
||||
|
Loading…
Reference in New Issue
Block a user