mirror of
https://github.com/valitydev/salt.git
synced 2024-11-08 01:18:58 +00:00
Merge pull request #47609 from rrroo/improve-aws-retries
Unify retries for AWS API
This commit is contained in:
commit
70b62074b5
@ -75,7 +75,6 @@ import time
|
||||
import uuid
|
||||
import pprint
|
||||
import logging
|
||||
import random
|
||||
|
||||
# Import libs for talking to the EC2 API
|
||||
import hmac
|
||||
@ -302,8 +301,8 @@ def query(params=None, setname=None, requesturl=None, location=None,
|
||||
# Retrieve access credentials from meta-data, or use provided
|
||||
access_key_id, secret_access_key, token = aws.creds(provider)
|
||||
|
||||
attempts = 5
|
||||
while attempts > 0:
|
||||
attempts = 0
|
||||
while attempts < aws.AWS_MAX_RETRIES:
|
||||
params_with_headers = params.copy()
|
||||
timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
|
||||
@ -381,7 +380,6 @@ def query(params=None, setname=None, requesturl=None, location=None,
|
||||
|
||||
signature = hmac.new(signing_key, (string_to_sign).encode('utf-8'),
|
||||
hashlib.sha256).hexdigest()
|
||||
#sig = binascii.b2a_base64(hashed)
|
||||
|
||||
authorization_header = algorithm + ' ' + 'Credential=' + \
|
||||
provider['id'] + '/' + credential_scope + \
|
||||
@ -407,15 +405,14 @@ def query(params=None, setname=None, requesturl=None, location=None,
|
||||
|
||||
# check to see if we should retry the query
|
||||
err_code = data.get('Errors', {}).get('Error', {}).get('Code', '')
|
||||
if attempts > 0 and err_code and err_code in EC2_RETRY_CODES:
|
||||
attempts -= 1
|
||||
if err_code and err_code in EC2_RETRY_CODES:
|
||||
attempts += 1
|
||||
log.error(
|
||||
'EC2 Response Status Code and Error: [%s %s] %s; '
|
||||
'Attempts remaining: %s',
|
||||
exc.response.status_code, exc, data, attempts
|
||||
)
|
||||
# Wait a bit before continuing to prevent throttling
|
||||
time.sleep(2)
|
||||
aws.sleep_exponential_backoff(attempts)
|
||||
continue
|
||||
|
||||
log.error(
|
||||
@ -1562,10 +1559,6 @@ def _modify_eni_properties(eni_id, properties=None, vm_=None):
|
||||
for k, v in six.iteritems(properties):
|
||||
params[k] = v
|
||||
|
||||
retries = 5
|
||||
while retries > 0:
|
||||
retries = retries - 1
|
||||
|
||||
result = aws.query(params,
|
||||
return_root=True,
|
||||
location=get_location(vm_),
|
||||
@ -1574,17 +1567,13 @@ def _modify_eni_properties(eni_id, properties=None, vm_=None):
|
||||
sigver='4')
|
||||
|
||||
if isinstance(result, dict) and result.get('error'):
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
return result
|
||||
|
||||
raise SaltCloudException(
|
||||
'Could not change interface <{0}> attributes '
|
||||
'<\'{1}\'> after 5 retries'.format(
|
||||
'Could not change interface <{0}> attributes <\'{1}\'>'.format(
|
||||
eni_id, properties
|
||||
)
|
||||
)
|
||||
else:
|
||||
return result
|
||||
|
||||
|
||||
def _associate_eip_with_interface(eni_id, eip_id, private_ip=None, vm_=None):
|
||||
@ -1597,8 +1586,6 @@ def _associate_eip_with_interface(eni_id, eip_id, private_ip=None, vm_=None):
|
||||
be NATted to - useful if you have multiple IP addresses assigned to an
|
||||
interface.
|
||||
'''
|
||||
retries = 5
|
||||
while retries > 0:
|
||||
params = {'Action': 'AssociateAddress',
|
||||
'NetworkInterfaceId': eni_id,
|
||||
'AllocationId': eip_id}
|
||||
@ -1606,7 +1593,6 @@ def _associate_eip_with_interface(eni_id, eip_id, private_ip=None, vm_=None):
|
||||
if private_ip:
|
||||
params['PrivateIpAddress'] = private_ip
|
||||
|
||||
retries = retries - 1
|
||||
result = aws.query(params,
|
||||
return_root=True,
|
||||
location=get_location(vm_),
|
||||
@ -1614,12 +1600,13 @@ def _associate_eip_with_interface(eni_id, eip_id, private_ip=None, vm_=None):
|
||||
opts=__opts__,
|
||||
sigver='4')
|
||||
|
||||
if isinstance(result, dict) and result.get('error'):
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
if not result[2].get('associationId'):
|
||||
break
|
||||
raise SaltCloudException(
|
||||
'Could not associate elastic ip address '
|
||||
'<{0}> with network interface <{1}>'.format(
|
||||
eip_id, eni_id
|
||||
)
|
||||
)
|
||||
|
||||
log.debug(
|
||||
'Associated ElasticIP address %s with interface %s',
|
||||
@ -1628,13 +1615,6 @@ def _associate_eip_with_interface(eni_id, eip_id, private_ip=None, vm_=None):
|
||||
|
||||
return result[2].get('associationId')
|
||||
|
||||
raise SaltCloudException(
|
||||
'Could not associate elastic ip address '
|
||||
'<{0}> with network interface <{1}>'.format(
|
||||
eip_id, eni_id
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _update_enis(interfaces, instance, vm_=None):
|
||||
config_enis = {}
|
||||
@ -2011,7 +1991,8 @@ def request_instance(vm_=None, call=None):
|
||||
params[termination_key] = six.text_type(set_del_root_vol_on_destroy).lower()
|
||||
|
||||
# Use default volume type if not specified
|
||||
if ex_blockdevicemappings and dev_index < len(ex_blockdevicemappings) and 'Ebs.VolumeType' not in ex_blockdevicemappings[dev_index]:
|
||||
if ex_blockdevicemappings and dev_index < len(ex_blockdevicemappings) and \
|
||||
'Ebs.VolumeType' not in ex_blockdevicemappings[dev_index]:
|
||||
type_key = '{0}BlockDeviceMapping.{1}.Ebs.VolumeType'.format(spot_prefix, dev_index)
|
||||
params[type_key] = rd_type
|
||||
|
||||
@ -2182,8 +2163,7 @@ def query_instance(vm_=None, call=None):
|
||||
provider = get_provider(vm_)
|
||||
|
||||
attempts = 0
|
||||
# perform exponential backoff and wait up to one minute (2**6 seconds)
|
||||
while attempts < 7:
|
||||
while attempts < aws.AWS_MAX_RETRIES:
|
||||
data, requesturl = aws.query(params, # pylint: disable=unbalanced-tuple-unpacking
|
||||
location=location,
|
||||
provider=provider,
|
||||
@ -2205,7 +2185,7 @@ def query_instance(vm_=None, call=None):
|
||||
else:
|
||||
break
|
||||
|
||||
time.sleep(random.uniform(1, 2**attempts))
|
||||
aws.sleep_exponential_backoff(attempts)
|
||||
attempts += 1
|
||||
continue
|
||||
else:
|
||||
@ -2215,7 +2195,6 @@ def query_instance(vm_=None, call=None):
|
||||
|
||||
def __query_ip_address(params, url): # pylint: disable=W0613
|
||||
data = aws.query(params,
|
||||
#requesturl=url,
|
||||
location=location,
|
||||
provider=provider,
|
||||
opts=__opts__,
|
||||
@ -3028,9 +3007,9 @@ def set_tags(name=None,
|
||||
params['Tag.{0}.Key'.format(idx)] = tag_k
|
||||
params['Tag.{0}.Value'.format(idx)] = tag_v
|
||||
|
||||
attempts = 5
|
||||
while attempts >= 0:
|
||||
result = aws.query(params,
|
||||
attempts = 0
|
||||
while attempts < aws.AWS_MAX_RETRIES:
|
||||
aws.query(params,
|
||||
setname='tagSet',
|
||||
location=location,
|
||||
provider=get_provider(),
|
||||
@ -3064,9 +3043,8 @@ def set_tags(name=None,
|
||||
|
||||
if failed_to_set_tags:
|
||||
log.warning('Failed to set tags. Remaining attempts %s', attempts)
|
||||
attempts -= 1
|
||||
# Just a little delay between attempts...
|
||||
time.sleep(1)
|
||||
attempts += 1
|
||||
aws.sleep_exponential_backoff(attempts)
|
||||
continue
|
||||
|
||||
return settags
|
||||
@ -3405,8 +3383,8 @@ def _get_node(name=None, instance_id=None, location=None):
|
||||
|
||||
provider = get_provider()
|
||||
|
||||
attempts = 10
|
||||
while attempts >= 0:
|
||||
attempts = 0
|
||||
while attempts < aws.AWS_MAX_RETRIES:
|
||||
try:
|
||||
instances = aws.query(params,
|
||||
location=location,
|
||||
@ -3416,13 +3394,12 @@ def _get_node(name=None, instance_id=None, location=None):
|
||||
instance_info = _extract_instance_info(instances).values()
|
||||
return next(iter(instance_info))
|
||||
except IndexError:
|
||||
attempts -= 1
|
||||
attempts += 1
|
||||
log.debug(
|
||||
'Failed to get the data for node \'%s\'. Remaining '
|
||||
'attempts: %s', instance_id or name, attempts
|
||||
)
|
||||
# Just a little delay between attempts...
|
||||
time.sleep(0.5)
|
||||
aws.sleep_exponential_backoff(attempts)
|
||||
return {}
|
||||
|
||||
|
||||
@ -3946,7 +3923,8 @@ def register_image(kwargs=None, call=None):
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
salt-cloud -f register_image my-ec2-config ami_name=my_ami description="my description" root_device_name=/dev/xvda snapshot_id=snap-xxxxxxxx
|
||||
salt-cloud -f register_image my-ec2-config ami_name=my_ami description="my description"
|
||||
root_device_name=/dev/xvda snapshot_id=snap-xxxxxxxx
|
||||
'''
|
||||
|
||||
if call != 'function':
|
||||
|
@ -52,6 +52,8 @@ AWS_RETRY_CODES = [
|
||||
]
|
||||
AWS_METADATA_TIMEOUT = 3.05
|
||||
|
||||
AWS_MAX_RETRIES = 7
|
||||
|
||||
IROLE_CODE = 'use-instance-role-credentials'
|
||||
__AccessKeyId__ = ''
|
||||
__SecretAccessKey__ = ''
|
||||
@ -61,6 +63,21 @@ __Location__ = ''
|
||||
__AssumeCache__ = {}
|
||||
|
||||
|
||||
def sleep_exponential_backoff(attempts):
|
||||
"""
|
||||
backoff an exponential amount of time to throttle requests
|
||||
during "API Rate Exceeded" failures as suggested by the AWS documentation here:
|
||||
https://docs.aws.amazon.com/AWSEC2/latest/APIReference/query-api-troubleshooting.html
|
||||
and also here:
|
||||
https://docs.aws.amazon.com/general/latest/gr/api-retries.html
|
||||
Failure to implement this approach results in a failure rate of >30% when using salt-cloud with
|
||||
"--parallel" when creating 50 or more instances with a fixed delay of 2 seconds.
|
||||
A failure rate of >10% is observed when using the salt-api with an asyncronous client
|
||||
specified (runner_async).
|
||||
"""
|
||||
time.sleep(random.uniform(1, 2**attempts))
|
||||
|
||||
|
||||
def creds(provider):
|
||||
'''
|
||||
Return the credentials for AWS signing. This could be just the id and key
|
||||
@ -441,9 +458,8 @@ def query(params=None, setname=None, requesturl=None, location=None,
|
||||
)
|
||||
headers = {}
|
||||
|
||||
MAX_RETRIES = 6
|
||||
attempts = 0
|
||||
while attempts < MAX_RETRIES:
|
||||
while attempts < AWS_MAX_RETRIES:
|
||||
log.debug('AWS Request: %s', requesturl)
|
||||
log.trace('AWS Request Parameters: %s', params_with_headers)
|
||||
try:
|
||||
@ -461,23 +477,14 @@ def query(params=None, setname=None, requesturl=None, location=None,
|
||||
|
||||
# check to see if we should retry the query
|
||||
err_code = data.get('Errors', {}).get('Error', {}).get('Code', '')
|
||||
if attempts < MAX_RETRIES and err_code and err_code in AWS_RETRY_CODES:
|
||||
if attempts < AWS_MAX_RETRIES and err_code and err_code in AWS_RETRY_CODES:
|
||||
attempts += 1
|
||||
log.error(
|
||||
'AWS Response Status Code and Error: [%s %s] %s; '
|
||||
'Attempts remaining: %s',
|
||||
exc.response.status_code, exc, data, attempts
|
||||
)
|
||||
# backoff an exponential amount of time to throttle requests
|
||||
# during "API Rate Exceeded" failures as suggested by the AWS documentation here:
|
||||
# https://docs.aws.amazon.com/AWSEC2/latest/APIReference/query-api-troubleshooting.html
|
||||
# and also here:
|
||||
# https://docs.aws.amazon.com/general/latest/gr/api-retries.html
|
||||
# Failure to implement this approach results in a failure rate of >30% when using salt-cloud with
|
||||
# "--parallel" when creating 50 or more instances with a fixed delay of 2 seconds.
|
||||
# A failure rate of >10% is observed when using the salt-api with an asyncronous client
|
||||
# specified (runner_async).
|
||||
time.sleep(random.uniform(1, 2**attempts))
|
||||
sleep_exponential_backoff(attempts)
|
||||
continue
|
||||
|
||||
log.error(
|
||||
|
Loading…
Reference in New Issue
Block a user