mirror of
https://github.com/empayre/fleet.git
synced 2024-11-06 08:55:24 +00:00
Orbit to detect 5XX alongside network errors (#17084)
#16423, #16326 On the [original PR](https://github.com/fleetdm/fleet/pull/16968) we missed detecting 5XX errors. Fleet usually runs behind load balancers, so when bringing Fleet down, orbit connects successfully but gets 5XX errors, so we need to detect those too.
This commit is contained in:
parent
2c383a060f
commit
0642387b32
@ -9,7 +9,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"net"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
@ -783,8 +782,13 @@ func main() {
|
|||||||
enrollSecret,
|
enrollSecret,
|
||||||
fleetClientCertificate,
|
fleetClientCertificate,
|
||||||
orbitHostInfo,
|
orbitHostInfo,
|
||||||
func(err net.Error) {
|
&service.OnGetConfigErrFuncs{
|
||||||
log.Info().Err(err).Msg("network error")
|
DebugErrFunc: func(err error) {
|
||||||
|
log.Debug().Err(err).Msg("get config")
|
||||||
|
},
|
||||||
|
OnNetErrFunc: func(err error) {
|
||||||
|
log.Info().Err(err).Msg("network error")
|
||||||
|
},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1058,8 +1062,13 @@ func main() {
|
|||||||
enrollSecret,
|
enrollSecret,
|
||||||
fleetClientCertificate,
|
fleetClientCertificate,
|
||||||
orbitHostInfo,
|
orbitHostInfo,
|
||||||
func(err net.Error) {
|
&service.OnGetConfigErrFuncs{
|
||||||
log.Info().Err(err).Msg("network error")
|
DebugErrFunc: func(err error) {
|
||||||
|
log.Debug().Err(err).Msg("get config")
|
||||||
|
},
|
||||||
|
OnNetErrFunc: func(err error) {
|
||||||
|
log.Info().Err(err).Msg("network error")
|
||||||
|
},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -37,7 +37,7 @@ type OrbitClient struct {
|
|||||||
lastRecordedErr error
|
lastRecordedErr error
|
||||||
|
|
||||||
configCache configCache
|
configCache configCache
|
||||||
onNetErrOnGetConfigFn OnNetErrOnGetConfigFunc
|
onGetConfigErrFns *OnGetConfigErrFuncs
|
||||||
lastNetErrOnGetConfigLogged time.Time
|
lastNetErrOnGetConfigLogged time.Time
|
||||||
|
|
||||||
// TestNodeKey is used for testing only.
|
// TestNodeKey is used for testing only.
|
||||||
@ -87,8 +87,14 @@ func (oc *OrbitClient) request(verb string, path string, params interface{}, res
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// OnNetErrOnGetConfigFunc is a function executed when there are network errors in GetConfig.
|
// OnGetConfigErrFuncs defines functions to be executed on GetConfig errors.
|
||||||
type OnNetErrOnGetConfigFunc func(err net.Error)
|
type OnGetConfigErrFuncs struct {
|
||||||
|
// OnNetErrFunc receives network and 5XX errors on GetConfig requests.
|
||||||
|
// These errors are rate limited to once every 5 minutes.
|
||||||
|
OnNetErrFunc func(err error)
|
||||||
|
// DebugErrFunc receives all errors on GetConfig requests.
|
||||||
|
DebugErrFunc func(err error)
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
netErrInterval = 5 * time.Minute
|
netErrInterval = 5 * time.Minute
|
||||||
@ -100,8 +106,7 @@ var (
|
|||||||
// - rootDir is the Orbit's root directory, where the Orbit node key is loaded-from/stored.
|
// - rootDir is the Orbit's root directory, where the Orbit node key is loaded-from/stored.
|
||||||
// - addr is the address of the Fleet server.
|
// - addr is the address of the Fleet server.
|
||||||
// - orbitHostInfo is the host system information used for enrolling to Fleet.
|
// - orbitHostInfo is the host system information used for enrolling to Fleet.
|
||||||
// - OnNetErrOnGetConfigFn is called when there's a network error in GetConfig (this method
|
// - onGetConfigErrFns can be used to handle errors in the GetConfig request.
|
||||||
// is rate limited to be executed once every 5 minutes).
|
|
||||||
func NewOrbitClient(
|
func NewOrbitClient(
|
||||||
rootDir string,
|
rootDir string,
|
||||||
addr string,
|
addr string,
|
||||||
@ -110,7 +115,7 @@ func NewOrbitClient(
|
|||||||
enrollSecret string,
|
enrollSecret string,
|
||||||
fleetClientCert *tls.Certificate,
|
fleetClientCert *tls.Certificate,
|
||||||
orbitHostInfo fleet.OrbitHostInfo,
|
orbitHostInfo fleet.OrbitHostInfo,
|
||||||
onNetErrOnGetConfigFn OnNetErrOnGetConfigFunc,
|
onGetConfigErrFns *OnGetConfigErrFuncs,
|
||||||
) (*OrbitClient, error) {
|
) (*OrbitClient, error) {
|
||||||
orbitCapabilities := fleet.CapabilityMap{}
|
orbitCapabilities := fleet.CapabilityMap{}
|
||||||
bc, err := newBaseClient(addr, insecureSkipVerify, rootCA, "", fleetClientCert, orbitCapabilities)
|
bc, err := newBaseClient(addr, insecureSkipVerify, rootCA, "", fleetClientCert, orbitCapabilities)
|
||||||
@ -120,12 +125,12 @@ func NewOrbitClient(
|
|||||||
|
|
||||||
nodeKeyFilePath := filepath.Join(rootDir, constant.OrbitNodeKeyFileName)
|
nodeKeyFilePath := filepath.Join(rootDir, constant.OrbitNodeKeyFileName)
|
||||||
return &OrbitClient{
|
return &OrbitClient{
|
||||||
nodeKeyFilePath: nodeKeyFilePath,
|
nodeKeyFilePath: nodeKeyFilePath,
|
||||||
baseClient: bc,
|
baseClient: bc,
|
||||||
enrollSecret: enrollSecret,
|
enrollSecret: enrollSecret,
|
||||||
hostInfo: orbitHostInfo,
|
hostInfo: orbitHostInfo,
|
||||||
enrolled: false,
|
enrolled: false,
|
||||||
onNetErrOnGetConfigFn: onNetErrOnGetConfigFn,
|
onGetConfigErrFns: onGetConfigErrFns,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,22 +145,28 @@ func (oc *OrbitClient) GetConfig() (*fleet.OrbitConfig, error) {
|
|||||||
// If time-to-live passed, we update the config cache
|
// If time-to-live passed, we update the config cache
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
if now.After(oc.configCache.lastUpdated.Add(configCacheTTL)) {
|
if now.After(oc.configCache.lastUpdated.Add(configCacheTTL)) {
|
||||||
|
verb, path := "POST", "/api/fleet/orbit/config"
|
||||||
var (
|
var (
|
||||||
resp fleet.OrbitConfig
|
resp fleet.OrbitConfig
|
||||||
err error
|
err error
|
||||||
)
|
)
|
||||||
verb, path := "POST", "/api/fleet/orbit/config"
|
// Retry until we don't get a network error or a 5XX error.
|
||||||
// Retry until we don't get a network error.
|
|
||||||
_ = retry.Do(func() error {
|
_ = retry.Do(func() error {
|
||||||
err = oc.authenticatedRequest(verb, path, &orbitGetConfigRequest{}, &resp)
|
err = oc.authenticatedRequest(verb, path, &orbitGetConfigRequest{}, &resp)
|
||||||
var netErr net.Error
|
var (
|
||||||
if errors.As(err, &netErr) {
|
netErr net.Error
|
||||||
|
statusCodeErr *statusCodeErr
|
||||||
|
)
|
||||||
|
if err != nil && oc.onGetConfigErrFns != nil && oc.onGetConfigErrFns.DebugErrFunc != nil {
|
||||||
|
oc.onGetConfigErrFns.DebugErrFunc(err)
|
||||||
|
}
|
||||||
|
if errors.As(err, &netErr) || (errors.As(err, &statusCodeErr) && statusCodeErr.code >= 500) {
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
if oc.onNetErrOnGetConfigFn != nil && now.After(oc.lastNetErrOnGetConfigLogged.Add(netErrInterval)) {
|
if oc.onGetConfigErrFns != nil && oc.onGetConfigErrFns.OnNetErrFunc != nil && now.After(oc.lastNetErrOnGetConfigLogged.Add(netErrInterval)) {
|
||||||
oc.onNetErrOnGetConfigFn(netErr)
|
oc.onGetConfigErrFns.OnNetErrFunc(err)
|
||||||
oc.lastNetErrOnGetConfigLogged = now
|
oc.lastNetErrOnGetConfigLogged = now
|
||||||
}
|
}
|
||||||
return err // retry on network errors
|
return err // retry on network or server 5XX errors
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}, retry.WithInterval(configRetryOnNetworkError))
|
}, retry.WithInterval(configRetryOnNetworkError))
|
||||||
|
@ -35,6 +35,7 @@ GENERATE_MSI=1 \
|
|||||||
ENROLL_SECRET=6/EzU/+jPkxfTamWnRv1+IJsO4T9Etju \
|
ENROLL_SECRET=6/EzU/+jPkxfTamWnRv1+IJsO4T9Etju \
|
||||||
FLEET_DESKTOP=1 \
|
FLEET_DESKTOP=1 \
|
||||||
USE_FLEET_SERVER_CERTIFICATE=1 \
|
USE_FLEET_SERVER_CERTIFICATE=1 \
|
||||||
|
DEBUG=1 \
|
||||||
./tools/tuf/test/main.sh
|
./tools/tuf/test/main.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@ set -ex
|
|||||||
# USE_FLEET_SERVER_CERTIFICATE: Whether to use a custom certificate bundle.
|
# USE_FLEET_SERVER_CERTIFICATE: Whether to use a custom certificate bundle.
|
||||||
# USE_UPDATE_SERVER_CERTIFICATE: Whether to use a custom certificate bundle.
|
# USE_UPDATE_SERVER_CERTIFICATE: Whether to use a custom certificate bundle.
|
||||||
# FLEET_DESKTOP_ALTERNATIVE_BROWSER_HOST: Alternative host:port to use for the Fleet Desktop browser URLs.
|
# FLEET_DESKTOP_ALTERNATIVE_BROWSER_HOST: Alternative host:port to use for the Fleet Desktop browser URLs.
|
||||||
|
# DEBUG: Whether or not to build the package with --debug.
|
||||||
|
|
||||||
if [ -n "$GENERATE_PKG" ]; then
|
if [ -n "$GENERATE_PKG" ]; then
|
||||||
echo "Generating pkg..."
|
echo "Generating pkg..."
|
||||||
@ -40,7 +41,7 @@ if [ -n "$GENERATE_PKG" ]; then
|
|||||||
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
||||||
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
||||||
${INSECURE:+--insecure} \
|
${INSECURE:+--insecure} \
|
||||||
--debug \
|
${DEBUG:+--debug} \
|
||||||
--update-roots="$ROOT_KEYS" \
|
--update-roots="$ROOT_KEYS" \
|
||||||
--update-interval=10s \
|
--update-interval=10s \
|
||||||
--disable-open-folder \
|
--disable-open-folder \
|
||||||
@ -64,7 +65,7 @@ if [ -n "$GENERATE_DEB" ]; then
|
|||||||
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
||||||
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
||||||
${INSECURE:+--insecure} \
|
${INSECURE:+--insecure} \
|
||||||
--debug \
|
${DEBUG:+--debug} \
|
||||||
--update-roots="$ROOT_KEYS" \
|
--update-roots="$ROOT_KEYS" \
|
||||||
--update-interval=10s \
|
--update-interval=10s \
|
||||||
--disable-open-folder \
|
--disable-open-folder \
|
||||||
@ -87,7 +88,7 @@ if [ -n "$GENERATE_RPM" ]; then
|
|||||||
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
||||||
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
||||||
${INSECURE:+--insecure} \
|
${INSECURE:+--insecure} \
|
||||||
--debug \
|
${DEBUG:+--debug} \
|
||||||
--update-roots="$ROOT_KEYS" \
|
--update-roots="$ROOT_KEYS" \
|
||||||
--update-interval=10s \
|
--update-interval=10s \
|
||||||
--disable-open-folder \
|
--disable-open-folder \
|
||||||
@ -110,7 +111,7 @@ if [ -n "$GENERATE_MSI" ]; then
|
|||||||
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
${USE_FLEET_SERVER_CERTIFICATE:+--fleet-certificate=./tools/osquery/fleet.crt} \
|
||||||
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
${USE_UPDATE_SERVER_CERTIFICATE:+--update-tls-certificate=./tools/osquery/fleet.crt} \
|
||||||
${INSECURE:+--insecure} \
|
${INSECURE:+--insecure} \
|
||||||
--debug \
|
${DEBUG:+--debug} \
|
||||||
--update-roots="$ROOT_KEYS" \
|
--update-roots="$ROOT_KEYS" \
|
||||||
--update-interval=10s \
|
--update-interval=10s \
|
||||||
--disable-open-folder \
|
--disable-open-folder \
|
||||||
|
Loading…
Reference in New Issue
Block a user