mirror of
https://github.com/valitydev/salt.git
synced 2024-11-07 17:09:03 +00:00
Merge pull request #14460 from steverweber/fix_restarts
fix issues with keepalive minion
This commit is contained in:
commit
aeda9730ca
23
conf/minion
23
conf/minion
@ -133,31 +133,16 @@
|
||||
# Unless your master is under unusually heavy load, this should be left at the default.
|
||||
#auth_timeout: 60
|
||||
|
||||
|
||||
# Number of consecutive SaltReqTimeoutError that are acceptable when trying to authenticate.
|
||||
#auth_tries: 1
|
||||
|
||||
# If authentication failes due to SaltReqTimeoutError, continue without ending minion.
|
||||
#auth_safemode: True
|
||||
|
||||
# If the minion hits an error that is recoverable, restart the minion.
|
||||
#restart_on_error: False
|
||||
# If authentication failes due to SaltReqTimeoutError during a ping_interval,
|
||||
# cause sub minion proccess to restart.
|
||||
#auth_safemode: False
|
||||
|
||||
# Ping Master to ensure connection is alive (minutes).
|
||||
# TODO: perhaps could update the scheduler to raise Exception in main thread after /mine_interval (60 minutes)/ fails
|
||||
#ping_interval: 0
|
||||
|
||||
# To auto recover Minions if Master changes IP address (DDNS)
|
||||
#
|
||||
# auth_tries: 10
|
||||
# auth_safemode: False
|
||||
# ping_interval: 90
|
||||
# restart_on_error: True
|
||||
#
|
||||
# Minions wont know master is missing untill a ping fails. After the ping fail,
|
||||
# the minion will attempt authentication and likly fails out and cause a restart.
|
||||
# When the minion restarts it will resolve the Masters IP and attempt to reconnect.
|
||||
|
||||
#ping_interval: 90
|
||||
|
||||
# If you don't have any problems with syn-floods, dont bother with the
|
||||
# three recon_* settings described below, just leave the defaults!
|
||||
|
144
salt/__init__.py
144
salt/__init__.py
@ -7,8 +7,6 @@ Make me some salt!
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import time
|
||||
from random import randint
|
||||
|
||||
# All salt related deprecation warnings should be shown once each!
|
||||
warnings.filterwarnings(
|
||||
@ -43,7 +41,7 @@ try:
|
||||
except ImportError as exc:
|
||||
if exc.args[0] != 'No module named _msgpack':
|
||||
raise
|
||||
from salt.exceptions import SaltSystemExit, MasterExit, SaltClientError
|
||||
from salt.exceptions import SaltSystemExit, MasterExit
|
||||
|
||||
|
||||
# Let's instantiate logger using salt.log.setup.logging.getLogger() so pylint
|
||||
@ -157,65 +155,64 @@ class Minion(parsers.MinionOptionParser):
|
||||
|
||||
super(YourSubClass, self).prepare()
|
||||
'''
|
||||
if not hasattr(self, 'config'):
|
||||
self.parse_args()
|
||||
self.parse_args()
|
||||
|
||||
try:
|
||||
if self.config['verify_env']:
|
||||
confd = self.config.get('default_include')
|
||||
if confd:
|
||||
# If 'default_include' is specified in config, then use it
|
||||
if '*' in confd:
|
||||
# Value is of the form "minion.d/*.conf"
|
||||
confd = os.path.dirname(confd)
|
||||
if not os.path.isabs(confd):
|
||||
# If configured 'default_include' is not an absolute
|
||||
# path, consider it relative to folder of 'conf_file'
|
||||
# (/etc/salt by default)
|
||||
confd = os.path.join(
|
||||
os.path.dirname(self.config['conf_file']), confd
|
||||
)
|
||||
else:
|
||||
try:
|
||||
if self.config['verify_env']:
|
||||
confd = self.config.get('default_include')
|
||||
if confd:
|
||||
# If 'default_include' is specified in config, then use it
|
||||
if '*' in confd:
|
||||
# Value is of the form "minion.d/*.conf"
|
||||
confd = os.path.dirname(confd)
|
||||
if not os.path.isabs(confd):
|
||||
# If configured 'default_include' is not an absolute
|
||||
# path, consider it relative to folder of 'conf_file'
|
||||
# (/etc/salt by default)
|
||||
confd = os.path.join(
|
||||
os.path.dirname(self.config['conf_file']), 'minion.d'
|
||||
os.path.dirname(self.config['conf_file']), confd
|
||||
)
|
||||
v_dirs = [
|
||||
self.config['pki_dir'],
|
||||
self.config['cachedir'],
|
||||
self.config['sock_dir'],
|
||||
self.config['extension_modules'],
|
||||
confd,
|
||||
]
|
||||
if self.config.get('transport') == 'raet':
|
||||
v_dirs.append(os.path.join(self.config['pki_dir'], 'accepted'))
|
||||
v_dirs.append(os.path.join(self.config['pki_dir'], 'pending'))
|
||||
v_dirs.append(os.path.join(self.config['pki_dir'], 'rejected'))
|
||||
v_dirs.append(os.path.join(self.config['cachedir'], 'raet'))
|
||||
verify_env(
|
||||
v_dirs,
|
||||
self.config['user'],
|
||||
permissive=self.config['permissive_pki_access'],
|
||||
pki_dir=self.config['pki_dir'],
|
||||
else:
|
||||
confd = os.path.join(
|
||||
os.path.dirname(self.config['conf_file']), 'minion.d'
|
||||
)
|
||||
logfile = self.config['log_file']
|
||||
if logfile is not None and not logfile.startswith(('tcp://',
|
||||
'udp://',
|
||||
'file://')):
|
||||
# Logfile is not using Syslog, verify
|
||||
current_umask = os.umask(0077)
|
||||
verify_files([logfile], self.config['user'])
|
||||
os.umask(current_umask)
|
||||
except OSError as err:
|
||||
logger.exception('Failed to prepare salt environment')
|
||||
sys.exit(err.errno)
|
||||
|
||||
self.setup_logfile_logger()
|
||||
logger.info(
|
||||
'Setting up the Salt Minion "{0}"'.format(
|
||||
self.config['id']
|
||||
v_dirs = [
|
||||
self.config['pki_dir'],
|
||||
self.config['cachedir'],
|
||||
self.config['sock_dir'],
|
||||
self.config['extension_modules'],
|
||||
confd,
|
||||
]
|
||||
if self.config.get('transport') == 'raet':
|
||||
v_dirs.append(os.path.join(self.config['pki_dir'], 'accepted'))
|
||||
v_dirs.append(os.path.join(self.config['pki_dir'], 'pending'))
|
||||
v_dirs.append(os.path.join(self.config['pki_dir'], 'rejected'))
|
||||
v_dirs.append(os.path.join(self.config['cachedir'], 'raet'))
|
||||
verify_env(
|
||||
v_dirs,
|
||||
self.config['user'],
|
||||
permissive=self.config['permissive_pki_access'],
|
||||
pki_dir=self.config['pki_dir'],
|
||||
)
|
||||
logfile = self.config['log_file']
|
||||
if logfile is not None and not logfile.startswith(('tcp://',
|
||||
'udp://',
|
||||
'file://')):
|
||||
# Logfile is not using Syslog, verify
|
||||
current_umask = os.umask(0077)
|
||||
verify_files([logfile], self.config['user'])
|
||||
os.umask(current_umask)
|
||||
except OSError as err:
|
||||
logger.exception('Failed to prepare salt environment')
|
||||
sys.exit(err.errno)
|
||||
|
||||
self.setup_logfile_logger()
|
||||
logger.info(
|
||||
'Setting up the Salt Minion "{0}"'.format(
|
||||
self.config['id']
|
||||
)
|
||||
migrations.migrate_paths(self.config)
|
||||
)
|
||||
migrations.migrate_paths(self.config)
|
||||
if self.config['transport'].lower() == 'zeromq':
|
||||
# Late import so logging works correctly
|
||||
import salt.minion
|
||||
@ -248,29 +245,18 @@ class Minion(parsers.MinionOptionParser):
|
||||
|
||||
NOTE: Run any required code before calling `super()`.
|
||||
'''
|
||||
reconnect = True
|
||||
while reconnect:
|
||||
reconnect = False
|
||||
try:
|
||||
self.prepare()
|
||||
if check_user(self.config['user']):
|
||||
self.minion.tune_in()
|
||||
except (KeyboardInterrupt, SaltSystemExit) as exc:
|
||||
logger.warn('Stopping the Salt Minion')
|
||||
if isinstance(exc, KeyboardInterrupt):
|
||||
logger.warn('Exiting on Ctrl-c')
|
||||
else:
|
||||
logger.error(str(exc))
|
||||
except SaltClientError as exc:
|
||||
logger.error(exc)
|
||||
if self.config.get('restart_on_error'):
|
||||
logger.warn('** Restarting minion **')
|
||||
s = randint(0, self.config.get('random_reauth_delay', 10))
|
||||
logger.info('Sleeping random_reauth_delay of {0} seconds'.format(s))
|
||||
time.sleep(s)
|
||||
reconnect = True
|
||||
finally:
|
||||
self.shutdown()
|
||||
try:
|
||||
self.prepare()
|
||||
if check_user(self.config['user']):
|
||||
self.minion.tune_in()
|
||||
except (KeyboardInterrupt, SaltSystemExit) as exc:
|
||||
logger.warn('Stopping the Salt Minion')
|
||||
if isinstance(exc, KeyboardInterrupt):
|
||||
logger.warn('Exiting on Ctrl-c')
|
||||
else:
|
||||
logger.error(str(exc))
|
||||
finally:
|
||||
self.shutdown()
|
||||
|
||||
def shutdown(self):
|
||||
'''
|
||||
|
@ -9,6 +9,10 @@ import os
|
||||
import sys
|
||||
import traceback
|
||||
import logging
|
||||
import multiprocessing
|
||||
import threading
|
||||
import time
|
||||
from random import randint
|
||||
|
||||
# Import salt libs
|
||||
import salt
|
||||
@ -58,8 +62,84 @@ def salt_minion():
|
||||
if '' in sys.path:
|
||||
sys.path.remove('')
|
||||
|
||||
minion = salt.Minion()
|
||||
minion.start()
|
||||
if '--disable-keepalive' in sys.argv:
|
||||
sys.argv.remove('--disable-keepalive')
|
||||
minion = salt.Minion()
|
||||
minion.start()
|
||||
return
|
||||
|
||||
if '-d' in sys.argv or '--daemon' in sys.argv:
|
||||
# disable daemonize on sub processes
|
||||
if '-d' in sys.argv:
|
||||
sys.argv.remove('-d')
|
||||
if '--daemon' in sys.argv:
|
||||
sys.argv.remove('--daemon')
|
||||
# daemonize current process
|
||||
salt.utils.daemonize()
|
||||
|
||||
# run minion in a new process so its simple to cleanup resource
|
||||
def minion_process(q):
|
||||
# have the minion suicide if the parent process is gone
|
||||
# there is a small race issue where the parent PID could be replace
|
||||
# with another proccess with the same PID
|
||||
def suicide_when_without_parent(parent_pid):
|
||||
while True:
|
||||
time.sleep(5)
|
||||
try:
|
||||
# check pid alive (Unix only trick!)
|
||||
os.kill(parent_pid, 0)
|
||||
except OSError:
|
||||
sys.exit(999)
|
||||
if not salt.utils.is_windows():
|
||||
t = threading.Thread(target=suicide_when_without_parent, args=(os.getppid(),))
|
||||
t.start()
|
||||
|
||||
minion = None
|
||||
try:
|
||||
minion = salt.Minion()
|
||||
minion.start()
|
||||
q.put(0)
|
||||
except Exception, err:
|
||||
log.error(err)
|
||||
log.warn('** Restarting minion **')
|
||||
delay = 60
|
||||
if minion is None:
|
||||
if hasattr(minion, 'config'):
|
||||
delay = minion.config.get('random_reauth_delay', 60)
|
||||
random_delay = randint(1, delay)
|
||||
log.info('Sleeping random_reauth_delay of {0} seconds'.format(random_delay))
|
||||
# preform delay after minion resources have been cleaned
|
||||
q.put(random_delay)
|
||||
|
||||
# keep one minion subprocess running
|
||||
while True:
|
||||
q = multiprocessing.Queue()
|
||||
p = multiprocessing.Process(target=minion_process, args=(q,))
|
||||
p.start()
|
||||
|
||||
try:
|
||||
p.join()
|
||||
try:
|
||||
restart_delay = q.get(block=False)
|
||||
except Exception:
|
||||
if p.exitcode == 0:
|
||||
# Minion process ended naturally
|
||||
break
|
||||
restart_delay = 60
|
||||
if restart_delay == 0:
|
||||
# minion closed on normal behaviour like Ctrl+C
|
||||
break
|
||||
# delay restart to reduce flooding and allow network resources to close
|
||||
time.sleep(restart_delay)
|
||||
except KeyboardInterrupt, err:
|
||||
break
|
||||
|
||||
# need to reset logging because new minion objects
|
||||
# cause extra log handlers to accumulate
|
||||
rlogger = logging.getLogger()
|
||||
for h in rlogger.handlers:
|
||||
rlogger.removeHandler(h)
|
||||
logging.basicConfig()
|
||||
|
||||
|
||||
def salt_syndic():
|
||||
|
@ -46,7 +46,7 @@ class MinionTest(integration.ShellCase, integration.ShellCaseCommonTestsMixIn):
|
||||
|
||||
ret = self.run_script(
|
||||
self._call_binary_,
|
||||
'--config-dir {0} --pid-file {1} -l debug'.format(
|
||||
'--disable-keepalive --config-dir {0} --pid-file {1} -l debug'.format(
|
||||
config_dir,
|
||||
pid_path
|
||||
),
|
||||
|
Loading…
Reference in New Issue
Block a user