Reduce unnecessary file downloading in archive/file states

The file.managed state, which is used by the archive.extracted state to
download the source archive, at some point recently was modified to
clear the file from the minion cache. This caused unnecessary
re-downloading on subsequent runs, which slows down states considerably
when dealing with larger archives.

This commit makes the following changes to improve this:

1. The fileclient now accepts a `source_hash` argument, which will cause
   the client's get_url function to skip downloading http(s) and ftp
   files if the file is already cached, and its hash matches the passed
   hash. This argument has also been added to the `cp.get_url` and
   `cp.cache_file` function.

2. We no longer try to download the file when it's an http(s) or ftp URL
   when running `file.source_list`.

3. Where `cp.cache_file` is used, we pass the `source_hash` if it is
   available.

4. A `cache_source` argument has been added to the `file.managed` state,
   defaulting to `True`. This is now used to control whether or not the
   source file is cleared from the minion cache when the state
   completes.

5. Two new states (`file.cached` and `file.not_cached`) have been added
   to managed files in the minion cache.

In addition, the `archive.extracted` state has been modified in the
following ways:

1. For consistency with `file.managed`, a `cache_source` argument has
   been added. This also deprecates `keep`. If `keep` is used,
   `cache_source` assumes its value, and a warning is added to the state
   return to let the user know to update their SLS.

2. The variable name `cached_source` (used internally in the
   `archive.extracted` state) has been renamed to `cached` to reduce
   confusion with the new `cache_source` argument.

3. The new `file.cached` and `file.not_cached` states are now used to
   manage the source tarball instead of `file.managed`. This improves
   disk usage and reduces unnecessary complexity in the state as we no
   longer keep a copy of the archive in a separate location within the
   cachedir. We now only use the copy downloaded using `cp.cache_file`
   within the `file.cached` state. This change has also necessitated a
   new home for hash files tracked by the `source_hash_update` argument,
   in a subdirectory of the minion cachedir called `archive_hash`.
This commit is contained in:
Erik Johnson 2017-09-14 18:51:40 -05:00
parent 0e9eaed62c
commit fd1d89d385
7 changed files with 531 additions and 171 deletions

View File

@ -185,12 +185,13 @@ class Client(object):
'''
raise NotImplementedError
def cache_file(self, path, saltenv=u'base', cachedir=None):
def cache_file(self, path, saltenv=u'base', cachedir=None, source_hash=None):
'''
Pull a file down from the file server and store it in the minion
file cache
'''
return self.get_url(path, u'', True, saltenv, cachedir=cachedir)
return self.get_url(
path, u'', True, saltenv, cachedir=cachedir, source_hash=source_hash)
def cache_files(self, paths, saltenv=u'base', cachedir=None):
'''
@ -470,7 +471,7 @@ class Client(object):
return ret
def get_url(self, url, dest, makedirs=False, saltenv=u'base',
no_cache=False, cachedir=None):
no_cache=False, cachedir=None, source_hash=None):
'''
Get a single file from a URL.
'''
@ -525,6 +526,18 @@ class Client(object):
return u''
elif not no_cache:
dest = self._extrn_path(url, saltenv, cachedir=cachedir)
if source_hash is not None:
try:
source_hash = source_hash.split('=')[-1]
form = salt.utils.files.HASHES_REVMAP[len(source_hash)]
if salt.utils.get_hash(dest, form) == source_hash:
log.debug(
'Cached copy of %s (%s) matches source_hash %s, '
'skipping download', url, dest, source_hash
)
return dest
except (AttributeError, KeyError, IOError, OSError):
pass
destdir = os.path.dirname(dest)
if not os.path.isdir(destdir):
os.makedirs(destdir)
@ -532,7 +545,9 @@ class Client(object):
if url_data.scheme == u's3':
try:
def s3_opt(key, default=None):
u'''Get value of s3.<key> from Minion config or from Pillar'''
'''
Get value of s3.<key> from Minion config or from Pillar
'''
if u's3.' + key in self.opts:
return self.opts[u's3.' + key]
try:
@ -785,7 +800,7 @@ class Client(object):
def _extrn_path(self, url, saltenv, cachedir=None):
'''
Return the extn_filepath for a given url
Return the extrn_filepath for a given url
'''
url_data = urlparse(url)
if salt.utils.platform.is_windows():

View File

@ -60,7 +60,8 @@ def list_(name,
strip_components=None,
clean=False,
verbose=False,
saltenv='base'):
saltenv='base',
source_hash=None):
'''
.. versionadded:: 2016.11.0
.. versionchanged:: 2016.11.2
@ -149,6 +150,14 @@ def list_(name,
``archive``. This is only applicable when ``archive`` is a file from
the ``salt://`` fileserver.
source_hash
If ``name`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
.. _tarfile: https://docs.python.org/2/library/tarfile.html
.. _xz: http://tukaani.org/xz/
@ -160,6 +169,7 @@ def list_(name,
salt '*' archive.list /path/to/myfile.tar.gz strip_components=1
salt '*' archive.list salt://foo.tar.gz
salt '*' archive.list https://domain.tld/myfile.zip
salt '*' archive.list https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15
salt '*' archive.list ftp://10.1.2.3/foo.rar
'''
def _list_tar(name, cached, decompress_cmd, failhard=False):
@ -309,7 +319,7 @@ def list_(name,
)
return dirs, files, []
cached = __salt__['cp.cache_file'](name, saltenv)
cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash)
if not cached:
raise CommandExecutionError('Failed to cache {0}'.format(name))
@ -1094,7 +1104,7 @@ def unzip(zip_file,
return _trim_files(cleaned_files, trim_output)
def is_encrypted(name, clean=False, saltenv='base'):
def is_encrypted(name, clean=False, saltenv='base', source_hash=None):
'''
.. versionadded:: 2016.11.0
@ -1113,6 +1123,18 @@ def is_encrypted(name, clean=False, saltenv='base'):
If there is an error listing the archive's contents, the cached
file will not be removed, to allow for troubleshooting.
saltenv : base
Specifies the fileserver environment from which to retrieve
``archive``. This is only applicable when ``archive`` is a file from
the ``salt://`` fileserver.
source_hash
If ``name`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
CLI Examples:
@ -1122,9 +1144,10 @@ def is_encrypted(name, clean=False, saltenv='base'):
salt '*' archive.is_encrypted salt://foo.zip
salt '*' archive.is_encrypted salt://foo.zip saltenv=dev
salt '*' archive.is_encrypted https://domain.tld/myfile.zip clean=True
salt '*' archive.is_encrypted https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15
salt '*' archive.is_encrypted ftp://10.1.2.3/foo.zip
'''
cached = __salt__['cp.cache_file'](name, saltenv)
cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash)
if not cached:
raise CommandExecutionError('Failed to cache {0}'.format(name))

View File

@ -352,7 +352,7 @@ def get_dir(path, dest, saltenv='base', template=None, gzip=None, **kwargs):
return _client().get_dir(path, dest, saltenv, gzip)
def get_url(path, dest='', saltenv='base', makedirs=False):
def get_url(path, dest='', saltenv='base', makedirs=False, source_hash=None):
'''
.. versionchanged:: Oxygen
``dest`` can now be a directory
@ -386,6 +386,13 @@ def get_url(path, dest='', saltenv='base', makedirs=False):
Salt fileserver envrionment from which to retrieve the file. Ignored if
``path`` is not a ``salt://`` URL.
source_hash
If ``path`` is an http(s) or ftp URL and the file exists in the
minion's file cache, this option can be passed to keep the minion from
re-downloading the file if the cached copy matches the specified hash.
.. versionadded:: Oxygen
CLI Example:
.. code-block:: bash
@ -394,9 +401,11 @@ def get_url(path, dest='', saltenv='base', makedirs=False):
salt '*' cp.get_url http://www.slashdot.org /tmp/index.html
'''
if isinstance(dest, six.string_types):
result = _client().get_url(path, dest, makedirs, saltenv)
result = _client().get_url(
path, dest, makedirs, saltenv, source_hash=source_hash)
else:
result = _client().get_url(path, None, makedirs, saltenv, no_cache=True)
result = _client().get_url(
path, None, makedirs, saltenv, no_cache=True, source_hash=source_hash)
if not result:
log.error(
'Unable to fetch file {0} from saltenv {1}.'.format(
@ -429,11 +438,18 @@ def get_file_str(path, saltenv='base'):
return fn_
def cache_file(path, saltenv='base'):
def cache_file(path, saltenv='base', source_hash=None):
'''
Used to cache a single file on the Minion
Returns the location of the new cached file on the Minion.
Returns the location of the new cached file on the Minion
source_hash
If ``name`` is an http(s) or ftp URL and the file exists in the
minion's file cache, this option can be passed to keep the minion from
re-downloading the file if the cached copy matches the specified hash.
.. versionadded:: Oxygen
CLI Example:
@ -485,7 +501,7 @@ def cache_file(path, saltenv='base'):
if senv:
saltenv = senv
result = _client().cache_file(path, saltenv)
result = _client().cache_file(path, saltenv, source_hash=source_hash)
if not result:
log.error(
u'Unable to cache file \'%s\' from saltenv \'%s\'.',

View File

@ -60,6 +60,7 @@ import salt.utils.stringutils
import salt.utils.templates
import salt.utils.url
from salt.exceptions import CommandExecutionError, MinionError, SaltInvocationError, get_error_message as _get_error_message
from salt.utils.files import HASHES, HASHES_REVMAP
log = logging.getLogger(__name__)
@ -67,16 +68,6 @@ __func_alias__ = {
'makedirs_': 'makedirs'
}
HASHES = {
'sha512': 128,
'sha384': 96,
'sha256': 64,
'sha224': 56,
'sha1': 40,
'md5': 32,
}
HASHES_REVMAP = dict([(y, x) for x, y in six.iteritems(HASHES)])
def __virtual__():
'''
@ -3767,14 +3758,8 @@ def source_list(source, source_hash, saltenv):
ret = (single_src, single_hash)
break
elif proto.startswith('http') or proto == 'ftp':
try:
if __salt__['cp.cache_file'](single_src):
ret = (single_src, single_hash)
break
except MinionError as exc:
# Error downloading file. Log the caught exception and
# continue on to the next source.
log.exception(exc)
ret = (single_src, single_hash)
break
elif proto == 'file' and os.path.exists(urlparsed_single_src.path):
ret = (single_src, single_hash)
break
@ -3794,9 +3779,8 @@ def source_list(source, source_hash, saltenv):
ret = (single, source_hash)
break
elif proto.startswith('http') or proto == 'ftp':
if __salt__['cp.cache_file'](single):
ret = (single, source_hash)
break
ret = (single, source_hash)
break
elif single.startswith('/') and os.path.exists(single):
ret = (single, source_hash)
break
@ -4007,11 +3991,14 @@ def get_managed(
else:
sfn = cached_dest
# If we didn't have the template or remote file, let's get it
# Similarly when the file has been updated and the cache has to be refreshed
# If we didn't have the template or remote file, or the file has been
# updated and the cache has to be refreshed, download the file.
if not sfn or cache_refetch:
try:
sfn = __salt__['cp.cache_file'](source, saltenv)
sfn = __salt__['cp.cache_file'](
source,
saltenv,
source_hash=source_sum.get('hsum'))
except Exception as exc:
# A 404 or other error code may raise an exception, catch it
# and return a comment that will fail the calling state.
@ -4675,7 +4662,7 @@ def check_file_meta(
'''
changes = {}
if not source_sum:
source_sum = dict()
source_sum = {}
lstats = stats(name, hash_type=source_sum.get('hash_type', None), follow_symlinks=False)
if not lstats:
changes['newfile'] = name
@ -4683,7 +4670,10 @@ def check_file_meta(
if 'hsum' in source_sum:
if source_sum['hsum'] != lstats['sum']:
if not sfn and source:
sfn = __salt__['cp.cache_file'](source, saltenv)
sfn = __salt__['cp.cache_file'](
source,
saltenv,
source_hash=source_sum['hsum'])
if sfn:
try:
changes['diff'] = get_diff(
@ -4750,7 +4740,9 @@ def get_diff(file1,
saltenv='base',
show_filenames=True,
show_changes=True,
template=False):
template=False,
source_hash_file1=None,
source_hash_file2=None):
'''
Return unified diff of two files
@ -4785,6 +4777,22 @@ def get_diff(file1,
.. versionadded:: Oxygen
source_hash_file1
If ``file1`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
source_hash_file2
If ``file2`` is an http(s)/ftp URL and the file exists in the minion's
file cache, this option can be passed to keep the minion from
re-downloading the archive if the cached copy matches the specified
hash.
.. versionadded:: Oxygen
CLI Examples:
.. code-block:: bash
@ -4793,14 +4801,17 @@ def get_diff(file1,
salt '*' file.get_diff /tmp/foo.txt /tmp/bar.txt
'''
files = (file1, file2)
source_hashes = (source_hash_file1, source_hash_file2)
paths = []
errors = []
for filename in files:
for filename, source_hash in zip(files, source_hashes):
try:
# Local file paths will just return the same path back when passed
# to cp.cache_file.
cached_path = __salt__['cp.cache_file'](filename, saltenv)
cached_path = __salt__['cp.cache_file'](filename,
saltenv,
source_hash=source_hash)
if cached_path is False:
errors.append(
u'File {0} not found'.format(

View File

@ -64,16 +64,30 @@ def _gen_checksum(path):
'hash_type': __opts__['hash_type']}
def _update_checksum(cached_source):
cached_source_sum = '.'.join((cached_source, 'hash'))
source_sum = _gen_checksum(cached_source)
def _checksum_file_path(path):
relpath = '.'.join((os.path.relpath(path, __opts__['cachedir']), 'hash'))
if re.match(r'..[/\\]', relpath):
# path is a local file
relpath = salt.utils.path.join(
'local',
os.path.splitdrive(path)[-1].lstrip('/\\'),
)
return salt.utils.path.join(__opts__['cachedir'], 'archive_hash', relpath)
def _update_checksum(path):
checksum_file = _checksum_file_path(path)
checksum_dir = os.path.dirname(checksum_file)
if not os.path.isdir(checksum_dir):
os.makedirs(checksum_dir)
source_sum = _gen_checksum(path)
hash_type = source_sum.get('hash_type')
hsum = source_sum.get('hsum')
if hash_type and hsum:
lines = []
try:
try:
with salt.utils.files.fopen(cached_source_sum, 'r') as fp_:
with salt.utils.files.fopen(checksum_file, 'r') as fp_:
for line in fp_:
try:
lines.append(line.rstrip('\n').split(':', 1))
@ -83,7 +97,7 @@ def _update_checksum(cached_source):
if exc.errno != errno.ENOENT:
raise
with salt.utils.files.fopen(cached_source_sum, 'w') as fp_:
with salt.utils.files.fopen(checksum_file, 'w') as fp_:
for line in lines:
if line[0] == hash_type:
line[1] = hsum
@ -93,16 +107,16 @@ def _update_checksum(cached_source):
except (IOError, OSError) as exc:
log.warning(
'Failed to update checksum for %s: %s',
cached_source, exc.__str__()
path, exc.__str__(), exc_info=True
)
def _read_cached_checksum(cached_source, form=None):
def _read_cached_checksum(path, form=None):
if form is None:
form = __opts__['hash_type']
path = '.'.join((cached_source, 'hash'))
checksum_file = _checksum_file_path(path)
try:
with salt.utils.files.fopen(path, 'r') as fp_:
with salt.utils.files.fopen(checksum_file, 'r') as fp_:
for line in fp_:
# Should only be one line in this file but just in case it
# isn't, read only a single line to avoid overuse of memory.
@ -117,9 +131,9 @@ def _read_cached_checksum(cached_source, form=None):
return {'hash_type': hash_type, 'hsum': hsum}
def _compare_checksum(cached_source, source_sum):
def _compare_checksum(cached, source_sum):
cached_sum = _read_cached_checksum(
cached_source,
cached,
form=source_sum.get('hash_type', __opts__['hash_type'])
)
return source_sum == cached_sum
@ -146,6 +160,7 @@ def extracted(name,
source_hash_name=None,
source_hash_update=False,
skip_verify=False,
cache_source=True,
password=None,
options=None,
list_options=None,
@ -155,7 +170,6 @@ def extracted(name,
user=None,
group=None,
if_missing=None,
keep=False,
trim_output=False,
use_cmd_unzip=None,
extract_perms=True,
@ -391,6 +405,19 @@ def extracted(name,
.. versionadded:: 2016.3.4
cache_source : True
For ``source`` archives not local to the minion (i.e. from the Salt
fileserver or a remote source such as ``http(s)`` or ``ftp``), Salt
will need to download the archive to the minion cache before they can
be extracted. To remove the downloaded archive after extraction, set
this argument to ``False``.
.. versionadded:: 2017.7.3
keep : True
.. deprecated:: 2017.7.3
Use ``cache_source`` instead
password
**For ZIP archives only.** Password used for extraction.
@ -518,13 +545,6 @@ def extracted(name,
simply checked for existence and extraction will be skipped if
if is present.
keep : False
For ``source`` archives not local to the minion (i.e. from the Salt
fileserver or a remote source such as ``http(s)`` or ``ftp``), Salt
will need to download the archive to the minion cache before they can
be extracted. After extraction, these source archives will be removed
unless this argument is set to ``True``.
trim_output : False
Useful for archives with many files in them. This can either be set to
``True`` (in which case only the first 100 files extracted will be
@ -626,6 +646,14 @@ def extracted(name,
# Remove pub kwargs as they're irrelevant here.
kwargs = salt.utils.args.clean_kwargs(**kwargs)
if 'keep' in kwargs:
cache_source = bool(kwargs.pop('keep'))
ret.setdefault('warnings', []).append(
'The \'keep\' argument has been renamed to \'cache_source\'. '
'Assumed cache_source={0}. Please update your SLS to get rid of '
'this warning.'.format(cache_source)
)
if not _path_is_abs(name):
ret['comment'] = '{0} is not an absolute path'.format(name)
return ret
@ -721,10 +749,10 @@ def extracted(name,
urlparsed_source = _urlparse(source_match)
source_hash_basename = urlparsed_source.path or urlparsed_source.netloc
source_is_local = urlparsed_source.scheme in ('', 'file')
source_is_local = urlparsed_source.scheme in salt.utils.files.LOCAL_PROTOS
if source_is_local:
# Get rid of "file://" from start of source_match
source_match = urlparsed_source.path
source_match = os.path.realpath(os.path.expanduser(urlparsed_source.path))
if not os.path.isfile(source_match):
ret['comment'] = 'Source file \'{0}\' does not exist'.format(source_match)
return ret
@ -858,95 +886,49 @@ def extracted(name,
source_sum = {}
if source_is_local:
cached_source = source_match
cached = source_match
else:
cached_source = os.path.join(
__opts__['cachedir'],
'files',
__env__,
re.sub(r'[:/\\]', '_', source_hash_basename),
)
if os.path.isdir(cached_source):
# Prevent a traceback from attempting to read from a directory path
salt.utils.files.rm_rf(cached_source)
existing_cached_source_sum = _read_cached_checksum(cached_source)
if source_is_local:
# No need to download archive, it's local to the minion
update_source = False
else:
if not os.path.isfile(cached_source):
# Archive not cached, we need to download it
update_source = True
else:
# Archive is cached, keep=True likely used in prior run. If we need
# to verify the hash, then we *have* to update the source archive
# to know whether or not the hash changed. Hence the below
# statement. bool(source_hash) will be True if source_hash was
# passed, and otherwise False.
update_source = bool(source_hash)
if update_source:
if __opts__['test']:
ret['result'] = None
ret['comment'] = (
'Archive {0} would be downloaded to cache and checked to '
'discover if extraction is necessary'.format(
'Archive {0} would be cached (if necessary) and checked to '
'discover if extraction is needed'.format(
salt.utils.url.redact_http_basic_auth(source_match)
)
)
return ret
# NOTE: This will result in more than one copy of the source archive on
# the minion. The reason this is necessary is because if we are
# tracking the checksum using source_hash_update, we need a location
# where we can place the checksum file alongside the cached source
# file, where it won't be overwritten by caching a file with the same
# name in the same parent dir as the source file. Long term, we should
# come up with a better solution for this.
file_result = __states__['file.managed'](cached_source,
source=source_match,
source_hash=source_hash,
source_hash_name=source_hash_name,
makedirs=True,
skip_verify=skip_verify)
log.debug('file.managed: {0}'.format(file_result))
result = __states__['file.cached'](source_match,
source_hash=source_hash,
source_hash_name=source_hash_name,
skip_verify=skip_verify,
saltenv=__env__)
log.debug('file.cached: {0}'.format(result))
# Prevent a traceback if errors prevented the above state from getting
# off the ground.
if isinstance(file_result, list):
if isinstance(result, list):
try:
ret['comment'] = '\n'.join(file_result)
ret['comment'] = '\n'.join(result)
except TypeError:
ret['comment'] = '\n'.join([str(x) for x in file_result])
ret['comment'] = '\n'.join([str(x) for x in result])
return ret
try:
if not file_result['result']:
log.debug(
'failed to download %s',
salt.utils.url.redact_http_basic_auth(source_match)
)
return file_result
except TypeError:
if not file_result:
log.debug(
'failed to download %s',
salt.utils.url.redact_http_basic_auth(source_match)
)
return file_result
if result['result']:
# Get the path of the file in the minion cache
cached = __salt__['cp.is_cached'](source_match)
else:
log.debug(
'failed to download %s',
salt.utils.url.redact_http_basic_auth(source_match)
)
return result
else:
log.debug(
'Archive %s is already in cache',
salt.utils.url.redact_http_basic_auth(source_match)
)
existing_cached_source_sum = _read_cached_checksum(cached)
if source_hash and source_hash_update and not skip_verify:
# Create local hash sum file if we're going to track sum update
_update_checksum(cached_source)
_update_checksum(cached)
if archive_format == 'zip' and not password:
log.debug('Checking %s to see if it is password-protected',
@ -955,7 +937,7 @@ def extracted(name,
# implicitly enabled by setting the "options" argument.
try:
encrypted_zip = __salt__['archive.is_encrypted'](
cached_source,
cached,
clean=False,
saltenv=__env__)
except CommandExecutionError:
@ -973,7 +955,7 @@ def extracted(name,
return ret
try:
contents = __salt__['archive.list'](cached_source,
contents = __salt__['archive.list'](cached,
archive_format=archive_format,
options=list_options,
strip_components=strip_components,
@ -1142,7 +1124,7 @@ def extracted(name,
if not extraction_needed \
and source_hash_update \
and existing_cached_source_sum is not None \
and not _compare_checksum(cached_source, existing_cached_source_sum):
and not _compare_checksum(cached, existing_cached_source_sum):
extraction_needed = True
source_hash_trigger = True
else:
@ -1200,13 +1182,13 @@ def extracted(name,
__states__['file.directory'](name, user=user, makedirs=True)
created_destdir = True
log.debug('Extracting {0} to {1}'.format(cached_source, name))
log.debug('Extracting {0} to {1}'.format(cached, name))
try:
if archive_format == 'zip':
if use_cmd_unzip:
try:
files = __salt__['archive.cmd_unzip'](
cached_source,
cached,
name,
options=options,
trim_output=trim_output,
@ -1216,7 +1198,7 @@ def extracted(name,
ret['comment'] = exc.strerror
return ret
else:
files = __salt__['archive.unzip'](cached_source,
files = __salt__['archive.unzip'](cached,
name,
options=options,
trim_output=trim_output,
@ -1225,7 +1207,7 @@ def extracted(name,
**kwargs)
elif archive_format == 'rar':
try:
files = __salt__['archive.unrar'](cached_source,
files = __salt__['archive.unrar'](cached,
name,
trim_output=trim_output,
**kwargs)
@ -1235,7 +1217,7 @@ def extracted(name,
else:
if options is None:
try:
with closing(tarfile.open(cached_source, 'r')) as tar:
with closing(tarfile.open(cached, 'r')) as tar:
tar.extractall(name)
files = tar.getnames()
if trim_output:
@ -1243,7 +1225,7 @@ def extracted(name,
except tarfile.ReadError:
if salt.utils.path.which('xz'):
if __salt__['cmd.retcode'](
['xz', '-t', cached_source],
['xz', '-t', cached],
python_shell=False,
ignore_retcode=True) == 0:
# XZ-compressed data
@ -1259,7 +1241,7 @@ def extracted(name,
# pipe it to tar for extraction.
cmd = 'xz --decompress --stdout {0} | tar xvf -'
results = __salt__['cmd.run_all'](
cmd.format(_cmd_quote(cached_source)),
cmd.format(_cmd_quote(cached)),
cwd=name,
python_shell=True)
if results['retcode'] != 0:
@ -1329,7 +1311,7 @@ def extracted(name,
tar_cmd.append(tar_shortopts)
tar_cmd.extend(tar_longopts)
tar_cmd.extend(['-f', cached_source])
tar_cmd.extend(['-f', cached])
results = __salt__['cmd.run_all'](tar_cmd,
cwd=name,
@ -1500,18 +1482,12 @@ def extracted(name,
for item in enforce_failed:
ret['comment'] += '\n- {0}'.format(item)
if not source_is_local and not keep:
for path in (cached_source, __salt__['cp.is_cached'](source_match)):
if not path:
continue
log.debug('Cleaning cached source file %s', path)
try:
os.remove(path)
except OSError as exc:
if exc.errno != errno.ENOENT:
log.error(
'Failed to clean cached source file %s: %s',
cached_source, exc.__str__()
)
if not cache_source and not source_is_local:
log.debug('Cleaning cached source file %s', cached)
result = __states__['file.not_cached'](source_match, saltenv=__env__)
if not result['result']:
# Don't let failure to delete cached file cause the state itself ot
# fail, just drop it in the warnings.
ret.setdefault('warnings', []).append(result['comment'])
return ret

View File

@ -299,6 +299,7 @@ if salt.utils.platform.is_windows():
# Import 3rd-party libs
from salt.ext import six
from salt.ext.six.moves import zip_longest
from salt.ext.six.moves.urllib.parse import urlparse as _urlparse # pylint: disable=no-name-in-module
if salt.utils.platform.is_windows():
import pywintypes
import win32com.client
@ -1530,6 +1531,7 @@ def managed(name,
source=None,
source_hash='',
source_hash_name=None,
cache_source=True,
user=None,
group=None,
mode=None,
@ -1729,6 +1731,15 @@ def managed(name,
.. versionadded:: 2016.3.5
cache_source : True
Set to ``False`` to discard the cached copy of the source file once the
state completes. This can be useful for larger files to keep them from
taking up space in minion cache. However, keep in mind that discarding
the source file will result in the state needing to re-download the
source file if the state is run again.
.. versionadded:: 2017.7.3
user
The user to own the file, this defaults to the user salt is running as
on the minion
@ -2440,8 +2451,9 @@ def managed(name,
except Exception as exc:
ret['changes'] = {}
log.debug(traceback.format_exc())
if os.path.isfile(tmp_filename):
os.remove(tmp_filename)
salt.utils.files.remove(tmp_filename)
if not cache_source and sfn:
salt.utils.files.remove(sfn)
return _error(ret, 'Unable to check_cmd file: {0}'.format(exc))
# file being updated to verify using check_cmd
@ -2459,15 +2471,9 @@ def managed(name,
cret = mod_run_check_cmd(check_cmd, tmp_filename, **check_cmd_opts)
if isinstance(cret, dict):
ret.update(cret)
if os.path.isfile(tmp_filename):
os.remove(tmp_filename)
if sfn and os.path.isfile(sfn):
os.remove(sfn)
salt.utils.files.remove(tmp_filename)
return ret
if sfn and os.path.isfile(sfn):
os.remove(sfn)
# Since we generated a new tempfile and we are not returning here
# lets change the original sfn to the new tempfile or else we will
# get file not found
@ -2516,10 +2522,10 @@ def managed(name,
log.debug(traceback.format_exc())
return _error(ret, 'Unable to manage file: {0}'.format(exc))
finally:
if tmp_filename and os.path.isfile(tmp_filename):
os.remove(tmp_filename)
if sfn and os.path.isfile(sfn):
os.remove(sfn)
if tmp_filename:
salt.utils.files.remove(tmp_filename)
if not cache_source and sfn:
salt.utils.files.remove(sfn)
_RECURSE_TYPES = ['user', 'group', 'mode', 'ignore_files', 'ignore_dirs']
@ -3048,6 +3054,7 @@ def directory(name,
def recurse(name,
source,
cache_source=True,
clean=False,
require=None,
user=None,
@ -3080,6 +3087,15 @@ def recurse(name,
located on the master in the directory named spam, and is called eggs,
the source string is salt://spam/eggs
cache_source : True
Set to ``False`` to discard the cached copy of the source file once the
state completes. This can be useful for larger files to keep them from
taking up space in minion cache. However, keep in mind that discarding
the source file will result in the state needing to re-download the
source file if the state is run again.
.. versionadded:: 2017.7.3
clean
Make sure that only files that are set up by salt and required by this
function are kept. If this option is set then everything in this
@ -3360,6 +3376,7 @@ def recurse(name,
_ret = managed(
path,
source=source,
cache_source=cache_source,
user=user,
group=group,
mode='keep' if keep_mode else file_mode,
@ -6426,3 +6443,283 @@ def shortcut(
ret['comment'] += (', but was unable to set ownership to '
'{0}'.format(user))
return ret
def cached(name,
source_hash='',
source_hash_name=None,
skip_verify=False,
saltenv='base'):
'''
.. versionadded:: 2017.7.3
Ensures that a file is saved to the minion's cache. This state is primarily
invoked by other states to ensure that we do not re-download a source file
if we do not need to.
name
The URL of the file to be cached. To cache a file from an environment
other than ``base``, either use the ``saltenv`` argument or include the
saltenv in the URL (e.g. ``salt://path/to/file.conf?saltenv=dev``).
.. note::
A list of URLs is not supported, this must be a single URL. If a
local file is passed here, then the state will obviously not try to
download anything, but it will compare a hash if one is specified.
source_hash
See the documentation for this same argument in the
:py:func:`file.managed <salt.states.file.managed>` state.
.. note::
For remote files not originating from the ``salt://`` fileserver,
such as http(s) or ftp servers, this state will not re-download the
file if the locally-cached copy matches this hash. This is done to
prevent unnecessary downloading on repeated runs of this state. To
update the cached copy of a file, it is necessary to update this
hash.
source_hash_name
See the documentation for this same argument in the
:py:func:`file.managed <salt.states.file.managed>` state.
skip_verify
See the documentation for this same argument in the
:py:func:`file.managed <salt.states.file.managed>` state.
.. note::
Setting this to ``True`` will result in a copy of the file being
downloaded from a remote (http(s), ftp, etc.) source each time the
state is run.
saltenv
Used to specify the environment from which to download a file from the
Salt fileserver (i.e. those with ``salt://`` URL).
'''
ret = {'changes': {},
'comment': '',
'name': name,
'result': False}
try:
parsed = _urlparse(name)
except Exception:
ret['comment'] = 'Only URLs or local file paths are valid input'
return ret
# This if statement will keep the state from proceeding if a remote source
# is specified and no source_hash is presented (unless we're skipping hash
# verification).
if not skip_verify \
and not source_hash \
and parsed.scheme in salt.utils.files.REMOTE_PROTOS:
ret['comment'] = (
'Unable to verify upstream hash of source file {0}, please set '
'source_hash or set skip_verify to True'.format(name)
)
return ret
if source_hash:
# Get the hash and hash type from the input. This takes care of parsing
# the hash out of a file containing checksums, if that is how the
# source_hash was specified.
try:
source_sum = __salt__['file.get_source_sum'](
source=name,
source_hash=source_hash,
source_hash_name=source_hash_name,
saltenv=saltenv)
except CommandExecutionError as exc:
ret['comment'] = exc.strerror
return ret
else:
if not source_sum:
# We shouldn't get here, problems in retrieving the hash in
# file.get_source_sum should result in a CommandExecutionError
# being raised, which we catch above. Nevertheless, we should
# provide useful information in the event that
# file.get_source_sum regresses.
ret['comment'] = (
'Failed to get source hash from {0}. This may be a bug. '
'If this error persists, please report it and set '
'skip_verify to True to work around it.'.format(source_hash)
)
return ret
else:
source_sum = {}
if parsed.scheme in salt.utils.files.LOCAL_PROTOS:
# Source is a local file path
full_path = os.path.realpath(os.path.expanduser(parsed.path))
if os.path.exists(full_path):
if not skip_verify and source_sum:
# Enforce the hash
local_hash = __salt__['file.get_hash'](
full_path,
source_sum.get('hash_type', __opts__['hash_type']))
if local_hash == source_sum['hsum']:
ret['result'] = True
ret['comment'] = (
'File {0} is present on the minion and has hash '
'{1}'.format(full_path, local_hash)
)
else:
ret['comment'] = (
'File {0} is present on the minion, but the hash ({1}) '
'does not match the specified hash ({2})'.format(
full_path, local_hash, source_sum['hsum']
)
)
return ret
else:
ret['result'] = True
ret['comment'] = 'File {0} is present on the minion'.format(
full_path
)
return ret
else:
ret['comment'] = 'File {0} is not present on the minion'.format(
full_path
)
return ret
local_copy = __salt__['cp.is_cached'](name, saltenv=saltenv)
if local_copy:
# File is already cached
pre_hash = __salt__['file.get_hash'](
local_copy,
source_sum.get('hash_type', __opts__['hash_type']))
if not skip_verify and source_sum:
# Get the local copy's hash to compare with the hash that was
# specified via source_hash. If it matches, we can exit early from
# the state without going any further, because the file is cached
# with the correct hash.
if pre_hash == source_sum['hsum']:
ret['result'] = True
ret['comment'] = (
'File is already cached to {0} with hash {1}'.format(
local_copy, pre_hash
)
)
else:
pre_hash = None
# Cache the file. Note that this will not actually download the file if
# either of the following is true:
# 1. source is a salt:// URL and the fileserver determines that the hash
# of the minion's copy matches that of the fileserver.
# 2. File is remote (http(s), ftp, etc.) and the specified source_hash
# matches the cached copy.
# Remote, non salt:// sources _will_ download if a copy of the file was
# not already present in the minion cache.
try:
local_copy = __salt__['cp.cache_file'](
name,
saltenv=saltenv,
source_hash=source_sum.get('hsum'))
except Exception as exc:
ret['comment'] = exc.__str__()
return ret
if not local_copy:
ret['comment'] = (
'Failed to cache {0}, check minion log for more '
'information'.format(name)
)
return ret
post_hash = __salt__['file.get_hash'](
local_copy,
source_sum.get('hash_type', __opts__['hash_type']))
if pre_hash != post_hash:
ret['changes']['hash'] = {'old': pre_hash, 'new': post_hash}
# Check the hash, if we're enforcing one. Note that this will be the first
# hash check if the file was not previously cached, and the 2nd hash check
# if it was cached and the
if not skip_verify and source_sum:
if post_hash == source_sum['hsum']:
ret['result'] = True
ret['comment'] = (
'File is already cached to {0} with hash {1}'.format(
local_copy, post_hash
)
)
else:
ret['comment'] = (
'File is cached to {0}, but the hash ({1}) does not match '
'the specified hash ({2})'.format(
local_copy, post_hash, source_sum['hsum']
)
)
return ret
# We're not enforcing a hash, and we already know that the file was
# successfully cached, so we know the state was successful.
ret['result'] = True
ret['comment'] = 'File is cached to {0}'.format(local_copy)
return ret
def not_cached(name, saltenv='base'):
'''
.. versionadded:: 2017.7.3
Ensures that a file is saved to the minion's cache. This state is primarily
invoked by other states to ensure that we do not re-download a source file
if we do not need to.
name
The URL of the file to be cached. To cache a file from an environment
other than ``base``, either use the ``saltenv`` argument or include the
saltenv in the URL (e.g. ``salt://path/to/file.conf?saltenv=dev``).
.. note::
A list of URLs is not supported, this must be a single URL. If a
local file is passed here, the state will take no action.
saltenv
Used to specify the environment from which to download a file from the
Salt fileserver (i.e. those with ``salt://`` URL).
'''
ret = {'changes': {},
'comment': '',
'name': name,
'result': False}
try:
parsed = _urlparse(name)
except Exception:
ret['comment'] = 'Only URLs or local file paths are valid input'
return ret
else:
if parsed.scheme in salt.utils.files.LOCAL_PROTOS:
full_path = os.path.realpath(os.path.expanduser(parsed.path))
ret['result'] = True
ret['comment'] = (
'File {0} is a local path, no action taken'.format(
full_path
)
)
return ret
local_copy = __salt__['cp.is_cached'](name, saltenv=saltenv)
if local_copy:
try:
os.remove(local_copy)
except Exception as exc:
ret['comment'] = 'Failed to delete {0}: {1}'.format(
local_copy, exc.__str__()
)
else:
ret['result'] = True
ret['changes']['deleted'] = True
ret['comment'] = '{0} was deleted'.format(local_copy)
else:
ret['result'] = True
ret['comment'] = '{0} is not cached'.format(name)
return ret

View File

@ -35,10 +35,21 @@ except ImportError:
log = logging.getLogger(__name__)
LOCAL_PROTOS = ('', 'file')
REMOTE_PROTOS = ('http', 'https', 'ftp', 'swift', 's3')
VALID_PROTOS = ('salt', 'file') + REMOTE_PROTOS
TEMPFILE_PREFIX = '__salt.tmp.'
HASHES = {
'sha512': 128,
'sha384': 96,
'sha256': 64,
'sha224': 56,
'sha1': 40,
'md5': 32,
}
HASHES_REVMAP = dict([(y, x) for x, y in six.iteritems(HASHES)])
def guess_archive_type(name):
'''
@ -538,3 +549,14 @@ def is_text_file(fp_, blocksize=512):
nontext = block.translate(None, text_characters)
return float(len(nontext)) / len(block) <= 0.30
def remove(path):
'''
Runs os.remove(path) and suppresses the OSError if the file doesn't exist
'''
try:
os.remove(path)
except OSError as exc:
if exc.errno != errno.ENOENT:
raise