Merge pull request #27025 from cachedout/issue_25581

Better try and error handling for prep_jid
This commit is contained in:
Pedro Algarvio 2015-09-11 08:40:10 +01:00
commit 843c28b435
4 changed files with 44 additions and 19 deletions

View File

@ -328,16 +328,19 @@ class SyncClientMixin(object):
data['success'] = False
namespaced_event.fire_event(data, 'ret')
salt.utils.job.store_job(
self.opts,
{'id': self.opts['id'],
'tgt': self.opts['id'],
'jid': data['jid'],
'return': data,
},
event=None,
mminion=self.mminion,
)
try:
salt.utils.job.store_job(
self.opts,
{'id': self.opts['id'],
'tgt': self.opts['id'],
'jid': data['jid'],
'return': data,
},
event=None,
mminion=self.mminion,
)
except salt.exceptions.SaltCacheError:
log.error('Could not store job cache info. Job details for this run may be unavailable.')
# if we fired an event, make sure to delete the event object.
# This will ensure that we call destroy, which will do the 0MQ linger
log.info('Runner completed: {0}'.format(data['jid']))

View File

@ -151,6 +151,12 @@ class SaltClientTimeout(SaltException):
self.jid = jid
class SaltCacheError(SaltException):
'''
Thrown when a problem was encountered trying to read or write from the salt cache
'''
class SaltReqTimeoutError(SaltException):
'''
Thrown when a salt master request call fails to return within the timeout

View File

@ -1272,8 +1272,11 @@ class AESFuncs(object):
:param dict load: The minion payload
'''
salt.utils.job.store_job(
self.opts, load, event=self.event, mminion=self.mminion)
try:
salt.utils.job.store_job(
self.opts, load, event=self.event, mminion=self.mminion)
except salt.exception.SaltCacheError:
log.error('Could not store job information for load: {0}'.format(load))
def _syndic_return(self, load):
'''

View File

@ -12,11 +12,13 @@ import os
import shutil
import datetime
import hashlib
import time
# Import salt libs
import salt.payload
import salt.utils
import salt.utils.jid
import salt.exceptions
log = logging.getLogger(__name__)
@ -99,12 +101,16 @@ def _format_jid_instance(jid, job):
#TODO: add to returner docs-- this is a new one
def prep_jid(nocache=False, passed_jid=None):
def prep_jid(nocache=False, passed_jid=None, recurse_count=0):
'''
Return a job id and prepare the job id directory
This is the function responsible for making sure jids don't collide (unless its passed a jid)
So do what you have to do to make sure that stays the case
'''
if recurse_count >= 5:
err = 'prep_jid could not store a jid after {0} tries.'.format(recurse_count)
log.error(err)
raise salt.exceptions.SaltCacheError(err)
if passed_jid is None: # this can be a None of an empty string
jid = salt.utils.jid.gen_jid()
else:
@ -117,15 +123,22 @@ def prep_jid(nocache=False, passed_jid=None):
try:
os.makedirs(jid_dir_)
except OSError:
# TODO: some sort of sleep or something? Spinning is generally bad practice
time.sleep(0.1)
if passed_jid is None:
recurse_count += recurse_count
return prep_jid(nocache=nocache)
with salt.utils.fopen(os.path.join(jid_dir_, 'jid'), 'wb+') as fn_:
fn_.write(jid)
if nocache:
with salt.utils.fopen(os.path.join(jid_dir_, 'nocache'), 'wb+') as fn_:
fn_.write('')
try:
with salt.utils.fopen(os.path.join(jid_dir_, 'jid'), 'wb+') as fn_:
fn_.write(jid)
if nocache:
with salt.utils.fopen(os.path.join(jid_dir_, 'nocache'), 'wb+') as fn_:
fn_.write('')
except IOError:
log.warn('Could not write out jid file for job {0}. Retrying.'.format(jid))
time.sleep(0.1)
recurse_count += recurse_count
return prep_jid(passed_jid=jid, nocache=nocache)
return jid