mirror of
https://github.com/empayre/fleet.git
synced 2024-11-06 00:45:19 +00:00
Orbit to detect 5XX alongside network errors (#17084)
#16423, #16326 On the [original PR](https://github.com/fleetdm/fleet/pull/16968) we missed detecting 5XX errors. Fleet usually runs behind load balancers, so when bringing Fleet down, orbit connects successfully but gets 5XX errors, so we need to detect those too.
This commit is contained in:
parent
2c383a060f
commit
0642387b32
@ -9,7 +9,6 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"net"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
@ -783,8 +782,13 @@ func main() {
|
||||
enrollSecret,
|
||||
fleetClientCertificate,
|
||||
orbitHostInfo,
|
||||
func(err net.Error) {
|
||||
log.Info().Err(err).Msg("network error")
|
||||
&service.OnGetConfigErrFuncs{
|
||||
DebugErrFunc: func(err error) {
|
||||
log.Debug().Err(err).Msg("get config")
|
||||
},
|
||||
OnNetErrFunc: func(err error) {
|
||||
log.Info().Err(err).Msg("network error")
|
||||
},
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
@ -1058,8 +1062,13 @@ func main() {
|
||||
enrollSecret,
|
||||
fleetClientCertificate,
|
||||
orbitHostInfo,
|
||||
func(err net.Error) {
|
||||
log.Info().Err(err).Msg("network error")
|
||||
&service.OnGetConfigErrFuncs{
|
||||
DebugErrFunc: func(err error) {
|
||||
log.Debug().Err(err).Msg("get config")
|
||||
},
|
||||
OnNetErrFunc: func(err error) {
|
||||
log.Info().Err(err).Msg("network error")
|
||||
},
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
|
@ -37,7 +37,7 @@ type OrbitClient struct {
|
||||
lastRecordedErr error
|
||||
|
||||
configCache configCache
|
||||
onNetErrOnGetConfigFn OnNetErrOnGetConfigFunc
|
||||
onGetConfigErrFns *OnGetConfigErrFuncs
|
||||
lastNetErrOnGetConfigLogged time.Time
|
||||
|
||||
// TestNodeKey is used for testing only.
|
||||
@ -87,8 +87,14 @@ func (oc *OrbitClient) request(verb string, path string, params interface{}, res
|
||||
return nil
|
||||
}
|
||||
|
||||
// OnNetErrOnGetConfigFunc is a function executed when there are network errors in GetConfig.
|
||||
type OnNetErrOnGetConfigFunc func(err net.Error)
|
||||
// OnGetConfigErrFuncs defines functions to be executed on GetConfig errors.
|
||||
type OnGetConfigErrFuncs struct {
|
||||
// OnNetErrFunc receives network and 5XX errors on GetConfig requests.
|
||||
// These errors are rate limited to once every 5 minutes.
|
||||
OnNetErrFunc func(err error)
|
||||
// DebugErrFunc receives all errors on GetConfig requests.
|
||||
DebugErrFunc func(err error)
|
||||
}
|
||||
|
||||
var (
|
||||
netErrInterval = 5 * time.Minute
|
||||
@ -100,8 +106,7 @@ var (
|
||||
// - rootDir is the Orbit's root directory, where the Orbit node key is loaded-from/stored.
|
||||
// - addr is the address of the Fleet server.
|
||||
// - orbitHostInfo is the host system information used for enrolling to Fleet.
|
||||
// - OnNetErrOnGetConfigFn is called when there's a network error in GetConfig (this method
|
||||
// is rate limited to be executed once every 5 minutes).
|
||||
// - onGetConfigErrFns can be used to handle errors in the GetConfig request.
|
||||
func NewOrbitClient(
|
||||
rootDir string,
|
||||
addr string,
|
||||
@ -110,7 +115,7 @@ func NewOrbitClient(
|
||||
enrollSecret string,
|
||||
fleetClientCert *tls.Certificate,
|
||||
orbitHostInfo fleet.OrbitHostInfo,
|
||||
onNetErrOnGetConfigFn OnNetErrOnGetConfigFunc,
|
||||
onGetConfigErrFns *OnGetConfigErrFuncs,
|
||||
) (*OrbitClient, error) {
|
||||
orbitCapabilities := fleet.CapabilityMap{}
|
||||
bc, err := newBaseClient(addr, insecureSkipVerify, rootCA, "", fleetClientCert, orbitCapabilities)
|
||||
@ -120,12 +125,12 @@ func NewOrbitClient(
|
||||
|
||||
nodeKeyFilePath := filepath.Join(rootDir, constant.OrbitNodeKeyFileName)
|
||||
return &OrbitClient{
|
||||
nodeKeyFilePath: nodeKeyFilePath,
|
||||
baseClient: bc,
|
||||
enrollSecret: enrollSecret,
|
||||
hostInfo: orbitHostInfo,
|
||||
enrolled: false,
|
||||
onNetErrOnGetConfigFn: onNetErrOnGetConfigFn,
|
||||
nodeKeyFilePath: nodeKeyFilePath,
|
||||
baseClient: bc,
|
||||
enrollSecret: enrollSecret,
|
||||
hostInfo: orbitHostInfo,
|
||||
enrolled: false,
|
||||
onGetConfigErrFns: onGetConfigErrFns,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -140,22 +145,28 @@ func (oc *OrbitClient) GetConfig() (*fleet.OrbitConfig, error) {
|
||||
// If time-to-live passed, we update the config cache
|
||||
now := time.Now()
|
||||
if now.After(oc.configCache.lastUpdated.Add(configCacheTTL)) {
|
||||
verb, path := "POST", "/api/fleet/orbit/config"
|
||||
var (
|
||||
resp fleet.OrbitConfig
|
||||
err error
|
||||
)
|
||||
verb, path := "POST", "/api/fleet/orbit/config"
|
||||
// Retry until we don't get a network error.
|
||||
// Retry until we don't get a network error or a 5XX error.
|
||||
_ = retry.Do(func() error {
|
||||
err = oc.authenticatedRequest(verb, path, &orbitGetConfigRequest{}, &resp)
|
||||
var netErr net.Error
|
||||
if errors.As(err, &netErr) {
|
||||
var (
|
||||
netErr net.Error
|
||||
statusCodeErr *statusCodeErr
|
||||
)
|
||||
if err != nil && oc.onGetConfigErrFns != nil && oc.onGetConfigErrFns.DebugErrFunc != nil {
|
||||
oc.onGetConfigErrFns.DebugErrFunc(err)
|
||||
}
|
||||
if errors.As(err, &netErr) || (errors.As(err, &statusCodeErr) && statusCodeErr.code >= 500) {
|
||||
now := time.Now()
|
||||
if oc.onNetErrOnGetConfigFn != nil && now.After(oc.lastNetErrOnGetConfigLogged.Add(netErrInterval)) {
|
||||
oc.onNetErrOnGetConfigFn(netErr)
|
||||
if oc.onGetConfigErrFns != nil && oc.onGetConfigErrFns.OnNetErrFunc != nil && now.After(oc.lastNetErrOnGetConfigLogged.Add(netErrInterval)) {
|
||||
oc.onGetConfigErrFns.OnNetErrFunc(err)
|
||||
oc.lastNetErrOnGetConfigLogged = now
|
||||
}
|
||||
return err // retry on network errors
|
||||
return err // retry on network or server 5XX errors
|
||||
}
|
||||
return nil
|
||||
}, retry.WithInterval(configRetryOnNetworkError))
|
||||
|
@ -35,6 +35,7 @@ GENERATE_MSI=1 \
|
||||
ENROLL_SECRET=6/EzU/+jPkxfTamWnRv1+IJsO4T9Etju \
|
||||
FLEET_DESKTOP=1 \
|
||||
USE_FLEET_SERVER_CERTIFICATE=1 \
|
||||
DEBUG=1 \
|
||||
./tools/tuf/test/main.sh
|
||||
```
|
||||
|
||||
|
@ -29,6 +29,7 @@ set -ex
|
||||
# USE_FLEET_SERVER_CERTIFICATE: Whether to use a custom certificate bundle.
|
||||
# USE_UPDATE_SERVER_CERTIFICATE: Whether to use a custom certificate bundle.
|
||||
# FLEET_DESKTOP_ALTERNATIVE_BROWSER_HOST: Alternative host:port to use for the Fleet Desktop browser URLs.
|
||||
# DEBUG: Whether or not to build the package with --debug.
|
||||
|
||||
if [ -n "$GENERATE_PKG" ]; then
|
||||
echo "Generating pkg..."
|
||||
@ -40,7 +41,7 @@ if [ -n "$GENERATE_PKG" ]; then
|
||||
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
||||
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
||||
${INSECURE:+--insecure} \
|
||||
--debug \
|
||||
${DEBUG:+--debug} \
|
||||
--update-roots="$ROOT_KEYS" \
|
||||
--update-interval=10s \
|
||||
--disable-open-folder \
|
||||
@ -64,7 +65,7 @@ if [ -n "$GENERATE_DEB" ]; then
|
||||
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
||||
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
||||
${INSECURE:+--insecure} \
|
||||
--debug \
|
||||
${DEBUG:+--debug} \
|
||||
--update-roots="$ROOT_KEYS" \
|
||||
--update-interval=10s \
|
||||
--disable-open-folder \
|
||||
@ -87,7 +88,7 @@ if [ -n "$GENERATE_RPM" ]; then
|
||||
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
||||
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
||||
${INSECURE:+--insecure} \
|
||||
--debug \
|
||||
${DEBUG:+--debug} \
|
||||
--update-roots="$ROOT_KEYS" \
|
||||
--update-interval=10s \
|
||||
--disable-open-folder \
|
||||
@ -110,7 +111,7 @@ if [ -n "$GENERATE_MSI" ]; then
|
||||
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
||||
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
||||
${INSECURE:+--insecure} \
|
||||
--debug \
|
||||
${DEBUG:+--debug} \
|
||||
--update-roots="$ROOT_KEYS" \
|
||||
--update-interval=10s \
|
||||
--disable-open-folder \
|
||||
|
Loading…
Reference in New Issue
Block a user