Reduce queued instances and improve update_unclaimed script (#9343)

Reduce queued instances to 10 for sandbox from 20 and make the
update_unclaimed script aware of it and improve checking.
This commit is contained in:
Robert Fairburn 2023-01-16 14:09:37 -06:00 committed by GitHub
parent 2447a371b0
commit 3bdbce10c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 54 additions and 7 deletions

View File

@ -151,6 +151,44 @@ resource "aws_lambda_function" "jitprovisioner" {
}
}
module "jitprovisioner-lambda-warmer" {
source = "Nuagic/lambda-warmer/aws"
version = "3.0.1"
function_name = aws_lambda_function.jitprovisioner.function_name
function_arn = aws_lambda_function.jitprovisioner.arn
# This just needs to have a request to parse.
input = <<EOINPUT
{
"requestContext": {
"elb": {
"targetGroupArn": "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/lambda-279XGJDqGZ5rsrHC2Fjr/49e9d65c45c6791a"
}
},
"httpMethod": "GET",
"path": "/health",
"queryStringParameters": {
"query": "1234ABCD"
},
"multiValueHeaders": {
"accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"],
"accept-encoding": ["gzip"],
"accept-language": ["en-US,en;q=0.9"],
"connection": ["keep-alive"],
"host": ["lambda-alb-123578498.us-east-2.elb.amazonaws.com"],
"upgrade-insecure-requests": ["1"],
"user-agent": ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"],
"x-amzn-trace-id": ["Root=1-5c536348-3d683b8b04734faae651f476"],
"x-forwarded-for": ["72.12.164.125"],
"x-forwarded-port": ["80"],
"x-forwarded-proto": ["http"],
"x-imforwards": ["20"]
},
"body": "",
"isBase64Encoded": false
}
EOINPUT
}
resource "random_password" "authorization" {
length = 16
special = false

View File

@ -215,7 +215,7 @@ resource "aws_cloudwatch_metric_alarm" "unclaimed" {
namespace = "Fleet/sandbox"
period = "900"
statistic = "Average"
threshold = "10"
threshold = "5"
alarm_actions = [module.notify_slack.slack_topic_arn]
ok_actions = [module.notify_slack.slack_topic_arn]
treat_missing_data = "breaching"

View File

@ -53,7 +53,7 @@ spec:
- name: FLEET_SERVER_SANDBOX_ENABLED
value: "1"
- name: FLEET_VULNERABILITIES_PERIODICITY
value: "5m"
value: "15m"
- name: FLEET_LICENSE_ENFORCE_HOST_LIMIT
value: "true"
- name: FLEET_VULNERABILITIES_DATABASES_PATH

View File

@ -157,7 +157,7 @@ resource "helm_release" "main" {
set {
name = "imageTag"
value = "v4.26.0"
value = "v4.26.0-1"
}
set {

View File

@ -257,7 +257,7 @@ resource "aws_ecs_task_definition" "main" {
},
{
name = "QUEUED_INSTANCES"
value = "20"
value = "10"
},
{
name = "TF_VAR_redis_address"

View File

@ -53,16 +53,25 @@ export TF_VAR_mysql_secret="arn:aws:secretsmanager:us-east-2:411315989055:secret
terraform init -backend-config=backend.conf
EXPECTED_UNCLAIMED_INSTANCES=10
PREPROVISIONER_TASK_DEFINITION_ARN="$(aws ecs list-task-definitions | jq -r '.taskDefinitionArns[] | select(contains("sandbox-prod-preprovisioner"))')"
UNCLAIMED_INSTANCES="$(get_unclaimed_instances)"
UNCLAIMED_ARRAY=( ${UNCLAIMED_INSTANCES} )
# From the 11th and on.
ALL_BUT_TEN="$(tail -n +11 <<<"${UNCLAIMED_INSTANCES}")"
HALF_ROUND_DOWN="${UNCLAIMED_ARRAY[@]::$((${#UNCLAIMED_ARRAY[@]} / 2))}"
purge_instances "${ALL_BUT_TEN:?}"
purge_instances "${HALF_ROUND_DOWN:?}"
provision_new_instances
# If something went wrong, don't let us continue with way too few unclaimed instances
NEW_UNCLAIMED="$(get_unclaimed_instances | wc -w)"
if [ ${NEW_UNCLAIMED:?} -lt ${EXPECTED_UNCLAIMED_INSTANCES:?} ]; then
echo "Only ${NEW_UNCLAIMED:?} instances found, ${EXPECTED_UNCLAIMED_INSTANCES:?} expected. Press ENTER to continue or CTRL-C to abort."
read
fi
# Get a fresh unclaimed as close to runtime as possible to reduce risk of deleting a claimed instance.
REMAINING_UNCLAIMED="$(comm -12 <(get_unclaimed_instances) <(echo "${UNCLAIMED_INSTANCES:?}"))"
purge_instances "${REMAINING_UNCLAIMED:?}"