From fd1d89d3852a71571ec87c9cc52f6a1156c1db6a Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Thu, 14 Sep 2017 18:51:40 -0500 Subject: [PATCH] Reduce unnecessary file downloading in archive/file states The file.managed state, which is used by the archive.extracted state to download the source archive, at some point recently was modified to clear the file from the minion cache. This caused unnecessary re-downloading on subsequent runs, which slows down states considerably when dealing with larger archives. This commit makes the following changes to improve this: 1. The fileclient now accepts a `source_hash` argument, which will cause the client's get_url function to skip downloading http(s) and ftp files if the file is already cached, and its hash matches the passed hash. This argument has also been added to the `cp.get_url` and `cp.cache_file` function. 2. We no longer try to download the file when it's an http(s) or ftp URL when running `file.source_list`. 3. Where `cp.cache_file` is used, we pass the `source_hash` if it is available. 4. A `cache_source` argument has been added to the `file.managed` state, defaulting to `True`. This is now used to control whether or not the source file is cleared from the minion cache when the state completes. 5. Two new states (`file.cached` and `file.not_cached`) have been added to managed files in the minion cache. In addition, the `archive.extracted` state has been modified in the following ways: 1. For consistency with `file.managed`, a `cache_source` argument has been added. This also deprecates `keep`. If `keep` is used, `cache_source` assumes its value, and a warning is added to the state return to let the user know to update their SLS. 2. The variable name `cached_source` (used internally in the `archive.extracted` state) has been renamed to `cached` to reduce confusion with the new `cache_source` argument. 3. The new `file.cached` and `file.not_cached` states are now used to manage the source tarball instead of `file.managed`. This improves disk usage and reduces unnecessary complexity in the state as we no longer keep a copy of the archive in a separate location within the cachedir. We now only use the copy downloaded using `cp.cache_file` within the `file.cached` state. This change has also necessitated a new home for hash files tracked by the `source_hash_update` argument, in a subdirectory of the minion cachedir called `archive_hash`. --- salt/fileclient.py | 25 +++- salt/modules/archive.py | 31 +++- salt/modules/cp.py | 28 +++- salt/modules/file.py | 69 +++++---- salt/states/archive.py | 204 +++++++++++-------------- salt/states/file.py | 323 ++++++++++++++++++++++++++++++++++++++-- salt/utils/files.py | 22 +++ 7 files changed, 531 insertions(+), 171 deletions(-) diff --git a/salt/fileclient.py b/salt/fileclient.py index cb3b210a03..35c63b2cb1 100644 --- a/salt/fileclient.py +++ b/salt/fileclient.py @@ -185,12 +185,13 @@ class Client(object): ''' raise NotImplementedError - def cache_file(self, path, saltenv=u'base', cachedir=None): + def cache_file(self, path, saltenv=u'base', cachedir=None, source_hash=None): ''' Pull a file down from the file server and store it in the minion file cache ''' - return self.get_url(path, u'', True, saltenv, cachedir=cachedir) + return self.get_url( + path, u'', True, saltenv, cachedir=cachedir, source_hash=source_hash) def cache_files(self, paths, saltenv=u'base', cachedir=None): ''' @@ -470,7 +471,7 @@ class Client(object): return ret def get_url(self, url, dest, makedirs=False, saltenv=u'base', - no_cache=False, cachedir=None): + no_cache=False, cachedir=None, source_hash=None): ''' Get a single file from a URL. ''' @@ -525,6 +526,18 @@ class Client(object): return u'' elif not no_cache: dest = self._extrn_path(url, saltenv, cachedir=cachedir) + if source_hash is not None: + try: + source_hash = source_hash.split('=')[-1] + form = salt.utils.files.HASHES_REVMAP[len(source_hash)] + if salt.utils.get_hash(dest, form) == source_hash: + log.debug( + 'Cached copy of %s (%s) matches source_hash %s, ' + 'skipping download', url, dest, source_hash + ) + return dest + except (AttributeError, KeyError, IOError, OSError): + pass destdir = os.path.dirname(dest) if not os.path.isdir(destdir): os.makedirs(destdir) @@ -532,7 +545,9 @@ class Client(object): if url_data.scheme == u's3': try: def s3_opt(key, default=None): - u'''Get value of s3. from Minion config or from Pillar''' + ''' + Get value of s3. from Minion config or from Pillar + ''' if u's3.' + key in self.opts: return self.opts[u's3.' + key] try: @@ -785,7 +800,7 @@ class Client(object): def _extrn_path(self, url, saltenv, cachedir=None): ''' - Return the extn_filepath for a given url + Return the extrn_filepath for a given url ''' url_data = urlparse(url) if salt.utils.platform.is_windows(): diff --git a/salt/modules/archive.py b/salt/modules/archive.py index 70ef0bdecc..7d627f7fdb 100644 --- a/salt/modules/archive.py +++ b/salt/modules/archive.py @@ -60,7 +60,8 @@ def list_(name, strip_components=None, clean=False, verbose=False, - saltenv='base'): + saltenv='base', + source_hash=None): ''' .. versionadded:: 2016.11.0 .. versionchanged:: 2016.11.2 @@ -149,6 +150,14 @@ def list_(name, ``archive``. This is only applicable when ``archive`` is a file from the ``salt://`` fileserver. + source_hash + If ``name`` is an http(s)/ftp URL and the file exists in the minion's + file cache, this option can be passed to keep the minion from + re-downloading the archive if the cached copy matches the specified + hash. + + .. versionadded:: Oxygen + .. _tarfile: https://docs.python.org/2/library/tarfile.html .. _xz: http://tukaani.org/xz/ @@ -160,6 +169,7 @@ def list_(name, salt '*' archive.list /path/to/myfile.tar.gz strip_components=1 salt '*' archive.list salt://foo.tar.gz salt '*' archive.list https://domain.tld/myfile.zip + salt '*' archive.list https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15 salt '*' archive.list ftp://10.1.2.3/foo.rar ''' def _list_tar(name, cached, decompress_cmd, failhard=False): @@ -309,7 +319,7 @@ def list_(name, ) return dirs, files, [] - cached = __salt__['cp.cache_file'](name, saltenv) + cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash) if not cached: raise CommandExecutionError('Failed to cache {0}'.format(name)) @@ -1094,7 +1104,7 @@ def unzip(zip_file, return _trim_files(cleaned_files, trim_output) -def is_encrypted(name, clean=False, saltenv='base'): +def is_encrypted(name, clean=False, saltenv='base', source_hash=None): ''' .. versionadded:: 2016.11.0 @@ -1113,6 +1123,18 @@ def is_encrypted(name, clean=False, saltenv='base'): If there is an error listing the archive's contents, the cached file will not be removed, to allow for troubleshooting. + saltenv : base + Specifies the fileserver environment from which to retrieve + ``archive``. This is only applicable when ``archive`` is a file from + the ``salt://`` fileserver. + + source_hash + If ``name`` is an http(s)/ftp URL and the file exists in the minion's + file cache, this option can be passed to keep the minion from + re-downloading the archive if the cached copy matches the specified + hash. + + .. versionadded:: Oxygen CLI Examples: @@ -1122,9 +1144,10 @@ def is_encrypted(name, clean=False, saltenv='base'): salt '*' archive.is_encrypted salt://foo.zip salt '*' archive.is_encrypted salt://foo.zip saltenv=dev salt '*' archive.is_encrypted https://domain.tld/myfile.zip clean=True + salt '*' archive.is_encrypted https://domain.tld/myfile.zip source_hash=f1d2d2f924e986ac86fdf7b36c94bcdf32beec15 salt '*' archive.is_encrypted ftp://10.1.2.3/foo.zip ''' - cached = __salt__['cp.cache_file'](name, saltenv) + cached = __salt__['cp.cache_file'](name, saltenv, source_hash=source_hash) if not cached: raise CommandExecutionError('Failed to cache {0}'.format(name)) diff --git a/salt/modules/cp.py b/salt/modules/cp.py index 86634d559c..cdbeb4434e 100644 --- a/salt/modules/cp.py +++ b/salt/modules/cp.py @@ -352,7 +352,7 @@ def get_dir(path, dest, saltenv='base', template=None, gzip=None, **kwargs): return _client().get_dir(path, dest, saltenv, gzip) -def get_url(path, dest='', saltenv='base', makedirs=False): +def get_url(path, dest='', saltenv='base', makedirs=False, source_hash=None): ''' .. versionchanged:: Oxygen ``dest`` can now be a directory @@ -386,6 +386,13 @@ def get_url(path, dest='', saltenv='base', makedirs=False): Salt fileserver envrionment from which to retrieve the file. Ignored if ``path`` is not a ``salt://`` URL. + source_hash + If ``path`` is an http(s) or ftp URL and the file exists in the + minion's file cache, this option can be passed to keep the minion from + re-downloading the file if the cached copy matches the specified hash. + + .. versionadded:: Oxygen + CLI Example: .. code-block:: bash @@ -394,9 +401,11 @@ def get_url(path, dest='', saltenv='base', makedirs=False): salt '*' cp.get_url http://www.slashdot.org /tmp/index.html ''' if isinstance(dest, six.string_types): - result = _client().get_url(path, dest, makedirs, saltenv) + result = _client().get_url( + path, dest, makedirs, saltenv, source_hash=source_hash) else: - result = _client().get_url(path, None, makedirs, saltenv, no_cache=True) + result = _client().get_url( + path, None, makedirs, saltenv, no_cache=True, source_hash=source_hash) if not result: log.error( 'Unable to fetch file {0} from saltenv {1}.'.format( @@ -429,11 +438,18 @@ def get_file_str(path, saltenv='base'): return fn_ -def cache_file(path, saltenv='base'): +def cache_file(path, saltenv='base', source_hash=None): ''' Used to cache a single file on the Minion - Returns the location of the new cached file on the Minion. + Returns the location of the new cached file on the Minion + + source_hash + If ``name`` is an http(s) or ftp URL and the file exists in the + minion's file cache, this option can be passed to keep the minion from + re-downloading the file if the cached copy matches the specified hash. + + .. versionadded:: Oxygen CLI Example: @@ -485,7 +501,7 @@ def cache_file(path, saltenv='base'): if senv: saltenv = senv - result = _client().cache_file(path, saltenv) + result = _client().cache_file(path, saltenv, source_hash=source_hash) if not result: log.error( u'Unable to cache file \'%s\' from saltenv \'%s\'.', diff --git a/salt/modules/file.py b/salt/modules/file.py index 944736f740..7dfd5ced01 100644 --- a/salt/modules/file.py +++ b/salt/modules/file.py @@ -60,6 +60,7 @@ import salt.utils.stringutils import salt.utils.templates import salt.utils.url from salt.exceptions import CommandExecutionError, MinionError, SaltInvocationError, get_error_message as _get_error_message +from salt.utils.files import HASHES, HASHES_REVMAP log = logging.getLogger(__name__) @@ -67,16 +68,6 @@ __func_alias__ = { 'makedirs_': 'makedirs' } -HASHES = { - 'sha512': 128, - 'sha384': 96, - 'sha256': 64, - 'sha224': 56, - 'sha1': 40, - 'md5': 32, -} -HASHES_REVMAP = dict([(y, x) for x, y in six.iteritems(HASHES)]) - def __virtual__(): ''' @@ -3767,14 +3758,8 @@ def source_list(source, source_hash, saltenv): ret = (single_src, single_hash) break elif proto.startswith('http') or proto == 'ftp': - try: - if __salt__['cp.cache_file'](single_src): - ret = (single_src, single_hash) - break - except MinionError as exc: - # Error downloading file. Log the caught exception and - # continue on to the next source. - log.exception(exc) + ret = (single_src, single_hash) + break elif proto == 'file' and os.path.exists(urlparsed_single_src.path): ret = (single_src, single_hash) break @@ -3794,9 +3779,8 @@ def source_list(source, source_hash, saltenv): ret = (single, source_hash) break elif proto.startswith('http') or proto == 'ftp': - if __salt__['cp.cache_file'](single): - ret = (single, source_hash) - break + ret = (single, source_hash) + break elif single.startswith('/') and os.path.exists(single): ret = (single, source_hash) break @@ -4007,11 +3991,14 @@ def get_managed( else: sfn = cached_dest - # If we didn't have the template or remote file, let's get it - # Similarly when the file has been updated and the cache has to be refreshed + # If we didn't have the template or remote file, or the file has been + # updated and the cache has to be refreshed, download the file. if not sfn or cache_refetch: try: - sfn = __salt__['cp.cache_file'](source, saltenv) + sfn = __salt__['cp.cache_file']( + source, + saltenv, + source_hash=source_sum.get('hsum')) except Exception as exc: # A 404 or other error code may raise an exception, catch it # and return a comment that will fail the calling state. @@ -4675,7 +4662,7 @@ def check_file_meta( ''' changes = {} if not source_sum: - source_sum = dict() + source_sum = {} lstats = stats(name, hash_type=source_sum.get('hash_type', None), follow_symlinks=False) if not lstats: changes['newfile'] = name @@ -4683,7 +4670,10 @@ def check_file_meta( if 'hsum' in source_sum: if source_sum['hsum'] != lstats['sum']: if not sfn and source: - sfn = __salt__['cp.cache_file'](source, saltenv) + sfn = __salt__['cp.cache_file']( + source, + saltenv, + source_hash=source_sum['hsum']) if sfn: try: changes['diff'] = get_diff( @@ -4750,7 +4740,9 @@ def get_diff(file1, saltenv='base', show_filenames=True, show_changes=True, - template=False): + template=False, + source_hash_file1=None, + source_hash_file2=None): ''' Return unified diff of two files @@ -4785,6 +4777,22 @@ def get_diff(file1, .. versionadded:: Oxygen + source_hash_file1 + If ``file1`` is an http(s)/ftp URL and the file exists in the minion's + file cache, this option can be passed to keep the minion from + re-downloading the archive if the cached copy matches the specified + hash. + + .. versionadded:: Oxygen + + source_hash_file2 + If ``file2`` is an http(s)/ftp URL and the file exists in the minion's + file cache, this option can be passed to keep the minion from + re-downloading the archive if the cached copy matches the specified + hash. + + .. versionadded:: Oxygen + CLI Examples: .. code-block:: bash @@ -4793,14 +4801,17 @@ def get_diff(file1, salt '*' file.get_diff /tmp/foo.txt /tmp/bar.txt ''' files = (file1, file2) + source_hashes = (source_hash_file1, source_hash_file2) paths = [] errors = [] - for filename in files: + for filename, source_hash in zip(files, source_hashes): try: # Local file paths will just return the same path back when passed # to cp.cache_file. - cached_path = __salt__['cp.cache_file'](filename, saltenv) + cached_path = __salt__['cp.cache_file'](filename, + saltenv, + source_hash=source_hash) if cached_path is False: errors.append( u'File {0} not found'.format( diff --git a/salt/states/archive.py b/salt/states/archive.py index c2308cbbd0..d6e46e595e 100644 --- a/salt/states/archive.py +++ b/salt/states/archive.py @@ -64,16 +64,30 @@ def _gen_checksum(path): 'hash_type': __opts__['hash_type']} -def _update_checksum(cached_source): - cached_source_sum = '.'.join((cached_source, 'hash')) - source_sum = _gen_checksum(cached_source) +def _checksum_file_path(path): + relpath = '.'.join((os.path.relpath(path, __opts__['cachedir']), 'hash')) + if re.match(r'..[/\\]', relpath): + # path is a local file + relpath = salt.utils.path.join( + 'local', + os.path.splitdrive(path)[-1].lstrip('/\\'), + ) + return salt.utils.path.join(__opts__['cachedir'], 'archive_hash', relpath) + + +def _update_checksum(path): + checksum_file = _checksum_file_path(path) + checksum_dir = os.path.dirname(checksum_file) + if not os.path.isdir(checksum_dir): + os.makedirs(checksum_dir) + source_sum = _gen_checksum(path) hash_type = source_sum.get('hash_type') hsum = source_sum.get('hsum') if hash_type and hsum: lines = [] try: try: - with salt.utils.files.fopen(cached_source_sum, 'r') as fp_: + with salt.utils.files.fopen(checksum_file, 'r') as fp_: for line in fp_: try: lines.append(line.rstrip('\n').split(':', 1)) @@ -83,7 +97,7 @@ def _update_checksum(cached_source): if exc.errno != errno.ENOENT: raise - with salt.utils.files.fopen(cached_source_sum, 'w') as fp_: + with salt.utils.files.fopen(checksum_file, 'w') as fp_: for line in lines: if line[0] == hash_type: line[1] = hsum @@ -93,16 +107,16 @@ def _update_checksum(cached_source): except (IOError, OSError) as exc: log.warning( 'Failed to update checksum for %s: %s', - cached_source, exc.__str__() + path, exc.__str__(), exc_info=True ) -def _read_cached_checksum(cached_source, form=None): +def _read_cached_checksum(path, form=None): if form is None: form = __opts__['hash_type'] - path = '.'.join((cached_source, 'hash')) + checksum_file = _checksum_file_path(path) try: - with salt.utils.files.fopen(path, 'r') as fp_: + with salt.utils.files.fopen(checksum_file, 'r') as fp_: for line in fp_: # Should only be one line in this file but just in case it # isn't, read only a single line to avoid overuse of memory. @@ -117,9 +131,9 @@ def _read_cached_checksum(cached_source, form=None): return {'hash_type': hash_type, 'hsum': hsum} -def _compare_checksum(cached_source, source_sum): +def _compare_checksum(cached, source_sum): cached_sum = _read_cached_checksum( - cached_source, + cached, form=source_sum.get('hash_type', __opts__['hash_type']) ) return source_sum == cached_sum @@ -146,6 +160,7 @@ def extracted(name, source_hash_name=None, source_hash_update=False, skip_verify=False, + cache_source=True, password=None, options=None, list_options=None, @@ -155,7 +170,6 @@ def extracted(name, user=None, group=None, if_missing=None, - keep=False, trim_output=False, use_cmd_unzip=None, extract_perms=True, @@ -391,6 +405,19 @@ def extracted(name, .. versionadded:: 2016.3.4 + cache_source : True + For ``source`` archives not local to the minion (i.e. from the Salt + fileserver or a remote source such as ``http(s)`` or ``ftp``), Salt + will need to download the archive to the minion cache before they can + be extracted. To remove the downloaded archive after extraction, set + this argument to ``False``. + + .. versionadded:: 2017.7.3 + + keep : True + .. deprecated:: 2017.7.3 + Use ``cache_source`` instead + password **For ZIP archives only.** Password used for extraction. @@ -518,13 +545,6 @@ def extracted(name, simply checked for existence and extraction will be skipped if if is present. - keep : False - For ``source`` archives not local to the minion (i.e. from the Salt - fileserver or a remote source such as ``http(s)`` or ``ftp``), Salt - will need to download the archive to the minion cache before they can - be extracted. After extraction, these source archives will be removed - unless this argument is set to ``True``. - trim_output : False Useful for archives with many files in them. This can either be set to ``True`` (in which case only the first 100 files extracted will be @@ -626,6 +646,14 @@ def extracted(name, # Remove pub kwargs as they're irrelevant here. kwargs = salt.utils.args.clean_kwargs(**kwargs) + if 'keep' in kwargs: + cache_source = bool(kwargs.pop('keep')) + ret.setdefault('warnings', []).append( + 'The \'keep\' argument has been renamed to \'cache_source\'. ' + 'Assumed cache_source={0}. Please update your SLS to get rid of ' + 'this warning.'.format(cache_source) + ) + if not _path_is_abs(name): ret['comment'] = '{0} is not an absolute path'.format(name) return ret @@ -721,10 +749,10 @@ def extracted(name, urlparsed_source = _urlparse(source_match) source_hash_basename = urlparsed_source.path or urlparsed_source.netloc - source_is_local = urlparsed_source.scheme in ('', 'file') + source_is_local = urlparsed_source.scheme in salt.utils.files.LOCAL_PROTOS if source_is_local: # Get rid of "file://" from start of source_match - source_match = urlparsed_source.path + source_match = os.path.realpath(os.path.expanduser(urlparsed_source.path)) if not os.path.isfile(source_match): ret['comment'] = 'Source file \'{0}\' does not exist'.format(source_match) return ret @@ -858,95 +886,49 @@ def extracted(name, source_sum = {} if source_is_local: - cached_source = source_match + cached = source_match else: - cached_source = os.path.join( - __opts__['cachedir'], - 'files', - __env__, - re.sub(r'[:/\\]', '_', source_hash_basename), - ) - - if os.path.isdir(cached_source): - # Prevent a traceback from attempting to read from a directory path - salt.utils.files.rm_rf(cached_source) - - existing_cached_source_sum = _read_cached_checksum(cached_source) - - if source_is_local: - # No need to download archive, it's local to the minion - update_source = False - else: - if not os.path.isfile(cached_source): - # Archive not cached, we need to download it - update_source = True - else: - # Archive is cached, keep=True likely used in prior run. If we need - # to verify the hash, then we *have* to update the source archive - # to know whether or not the hash changed. Hence the below - # statement. bool(source_hash) will be True if source_hash was - # passed, and otherwise False. - update_source = bool(source_hash) - - if update_source: if __opts__['test']: ret['result'] = None ret['comment'] = ( - 'Archive {0} would be downloaded to cache and checked to ' - 'discover if extraction is necessary'.format( + 'Archive {0} would be cached (if necessary) and checked to ' + 'discover if extraction is needed'.format( salt.utils.url.redact_http_basic_auth(source_match) ) ) return ret - # NOTE: This will result in more than one copy of the source archive on - # the minion. The reason this is necessary is because if we are - # tracking the checksum using source_hash_update, we need a location - # where we can place the checksum file alongside the cached source - # file, where it won't be overwritten by caching a file with the same - # name in the same parent dir as the source file. Long term, we should - # come up with a better solution for this. - file_result = __states__['file.managed'](cached_source, - source=source_match, - source_hash=source_hash, - source_hash_name=source_hash_name, - makedirs=True, - skip_verify=skip_verify) - log.debug('file.managed: {0}'.format(file_result)) + result = __states__['file.cached'](source_match, + source_hash=source_hash, + source_hash_name=source_hash_name, + skip_verify=skip_verify, + saltenv=__env__) + log.debug('file.cached: {0}'.format(result)) # Prevent a traceback if errors prevented the above state from getting # off the ground. - if isinstance(file_result, list): + if isinstance(result, list): try: - ret['comment'] = '\n'.join(file_result) + ret['comment'] = '\n'.join(result) except TypeError: - ret['comment'] = '\n'.join([str(x) for x in file_result]) + ret['comment'] = '\n'.join([str(x) for x in result]) return ret - try: - if not file_result['result']: - log.debug( - 'failed to download %s', - salt.utils.url.redact_http_basic_auth(source_match) - ) - return file_result - except TypeError: - if not file_result: - log.debug( - 'failed to download %s', - salt.utils.url.redact_http_basic_auth(source_match) - ) - return file_result + if result['result']: + # Get the path of the file in the minion cache + cached = __salt__['cp.is_cached'](source_match) + else: + log.debug( + 'failed to download %s', + salt.utils.url.redact_http_basic_auth(source_match) + ) + return result - else: - log.debug( - 'Archive %s is already in cache', - salt.utils.url.redact_http_basic_auth(source_match) - ) + existing_cached_source_sum = _read_cached_checksum(cached) if source_hash and source_hash_update and not skip_verify: # Create local hash sum file if we're going to track sum update - _update_checksum(cached_source) + _update_checksum(cached) if archive_format == 'zip' and not password: log.debug('Checking %s to see if it is password-protected', @@ -955,7 +937,7 @@ def extracted(name, # implicitly enabled by setting the "options" argument. try: encrypted_zip = __salt__['archive.is_encrypted']( - cached_source, + cached, clean=False, saltenv=__env__) except CommandExecutionError: @@ -973,7 +955,7 @@ def extracted(name, return ret try: - contents = __salt__['archive.list'](cached_source, + contents = __salt__['archive.list'](cached, archive_format=archive_format, options=list_options, strip_components=strip_components, @@ -1142,7 +1124,7 @@ def extracted(name, if not extraction_needed \ and source_hash_update \ and existing_cached_source_sum is not None \ - and not _compare_checksum(cached_source, existing_cached_source_sum): + and not _compare_checksum(cached, existing_cached_source_sum): extraction_needed = True source_hash_trigger = True else: @@ -1200,13 +1182,13 @@ def extracted(name, __states__['file.directory'](name, user=user, makedirs=True) created_destdir = True - log.debug('Extracting {0} to {1}'.format(cached_source, name)) + log.debug('Extracting {0} to {1}'.format(cached, name)) try: if archive_format == 'zip': if use_cmd_unzip: try: files = __salt__['archive.cmd_unzip']( - cached_source, + cached, name, options=options, trim_output=trim_output, @@ -1216,7 +1198,7 @@ def extracted(name, ret['comment'] = exc.strerror return ret else: - files = __salt__['archive.unzip'](cached_source, + files = __salt__['archive.unzip'](cached, name, options=options, trim_output=trim_output, @@ -1225,7 +1207,7 @@ def extracted(name, **kwargs) elif archive_format == 'rar': try: - files = __salt__['archive.unrar'](cached_source, + files = __salt__['archive.unrar'](cached, name, trim_output=trim_output, **kwargs) @@ -1235,7 +1217,7 @@ def extracted(name, else: if options is None: try: - with closing(tarfile.open(cached_source, 'r')) as tar: + with closing(tarfile.open(cached, 'r')) as tar: tar.extractall(name) files = tar.getnames() if trim_output: @@ -1243,7 +1225,7 @@ def extracted(name, except tarfile.ReadError: if salt.utils.path.which('xz'): if __salt__['cmd.retcode']( - ['xz', '-t', cached_source], + ['xz', '-t', cached], python_shell=False, ignore_retcode=True) == 0: # XZ-compressed data @@ -1259,7 +1241,7 @@ def extracted(name, # pipe it to tar for extraction. cmd = 'xz --decompress --stdout {0} | tar xvf -' results = __salt__['cmd.run_all']( - cmd.format(_cmd_quote(cached_source)), + cmd.format(_cmd_quote(cached)), cwd=name, python_shell=True) if results['retcode'] != 0: @@ -1329,7 +1311,7 @@ def extracted(name, tar_cmd.append(tar_shortopts) tar_cmd.extend(tar_longopts) - tar_cmd.extend(['-f', cached_source]) + tar_cmd.extend(['-f', cached]) results = __salt__['cmd.run_all'](tar_cmd, cwd=name, @@ -1500,18 +1482,12 @@ def extracted(name, for item in enforce_failed: ret['comment'] += '\n- {0}'.format(item) - if not source_is_local and not keep: - for path in (cached_source, __salt__['cp.is_cached'](source_match)): - if not path: - continue - log.debug('Cleaning cached source file %s', path) - try: - os.remove(path) - except OSError as exc: - if exc.errno != errno.ENOENT: - log.error( - 'Failed to clean cached source file %s: %s', - cached_source, exc.__str__() - ) + if not cache_source and not source_is_local: + log.debug('Cleaning cached source file %s', cached) + result = __states__['file.not_cached'](source_match, saltenv=__env__) + if not result['result']: + # Don't let failure to delete cached file cause the state itself ot + # fail, just drop it in the warnings. + ret.setdefault('warnings', []).append(result['comment']) return ret diff --git a/salt/states/file.py b/salt/states/file.py index 05801ff544..cedd99e624 100644 --- a/salt/states/file.py +++ b/salt/states/file.py @@ -299,6 +299,7 @@ if salt.utils.platform.is_windows(): # Import 3rd-party libs from salt.ext import six from salt.ext.six.moves import zip_longest +from salt.ext.six.moves.urllib.parse import urlparse as _urlparse # pylint: disable=no-name-in-module if salt.utils.platform.is_windows(): import pywintypes import win32com.client @@ -1530,6 +1531,7 @@ def managed(name, source=None, source_hash='', source_hash_name=None, + cache_source=True, user=None, group=None, mode=None, @@ -1729,6 +1731,15 @@ def managed(name, .. versionadded:: 2016.3.5 + cache_source : True + Set to ``False`` to discard the cached copy of the source file once the + state completes. This can be useful for larger files to keep them from + taking up space in minion cache. However, keep in mind that discarding + the source file will result in the state needing to re-download the + source file if the state is run again. + + .. versionadded:: 2017.7.3 + user The user to own the file, this defaults to the user salt is running as on the minion @@ -2440,8 +2451,9 @@ def managed(name, except Exception as exc: ret['changes'] = {} log.debug(traceback.format_exc()) - if os.path.isfile(tmp_filename): - os.remove(tmp_filename) + salt.utils.files.remove(tmp_filename) + if not cache_source and sfn: + salt.utils.files.remove(sfn) return _error(ret, 'Unable to check_cmd file: {0}'.format(exc)) # file being updated to verify using check_cmd @@ -2459,15 +2471,9 @@ def managed(name, cret = mod_run_check_cmd(check_cmd, tmp_filename, **check_cmd_opts) if isinstance(cret, dict): ret.update(cret) - if os.path.isfile(tmp_filename): - os.remove(tmp_filename) - if sfn and os.path.isfile(sfn): - os.remove(sfn) + salt.utils.files.remove(tmp_filename) return ret - if sfn and os.path.isfile(sfn): - os.remove(sfn) - # Since we generated a new tempfile and we are not returning here # lets change the original sfn to the new tempfile or else we will # get file not found @@ -2516,10 +2522,10 @@ def managed(name, log.debug(traceback.format_exc()) return _error(ret, 'Unable to manage file: {0}'.format(exc)) finally: - if tmp_filename and os.path.isfile(tmp_filename): - os.remove(tmp_filename) - if sfn and os.path.isfile(sfn): - os.remove(sfn) + if tmp_filename: + salt.utils.files.remove(tmp_filename) + if not cache_source and sfn: + salt.utils.files.remove(sfn) _RECURSE_TYPES = ['user', 'group', 'mode', 'ignore_files', 'ignore_dirs'] @@ -3048,6 +3054,7 @@ def directory(name, def recurse(name, source, + cache_source=True, clean=False, require=None, user=None, @@ -3080,6 +3087,15 @@ def recurse(name, located on the master in the directory named spam, and is called eggs, the source string is salt://spam/eggs + cache_source : True + Set to ``False`` to discard the cached copy of the source file once the + state completes. This can be useful for larger files to keep them from + taking up space in minion cache. However, keep in mind that discarding + the source file will result in the state needing to re-download the + source file if the state is run again. + + .. versionadded:: 2017.7.3 + clean Make sure that only files that are set up by salt and required by this function are kept. If this option is set then everything in this @@ -3360,6 +3376,7 @@ def recurse(name, _ret = managed( path, source=source, + cache_source=cache_source, user=user, group=group, mode='keep' if keep_mode else file_mode, @@ -6426,3 +6443,283 @@ def shortcut( ret['comment'] += (', but was unable to set ownership to ' '{0}'.format(user)) return ret + + +def cached(name, + source_hash='', + source_hash_name=None, + skip_verify=False, + saltenv='base'): + ''' + .. versionadded:: 2017.7.3 + + Ensures that a file is saved to the minion's cache. This state is primarily + invoked by other states to ensure that we do not re-download a source file + if we do not need to. + + name + The URL of the file to be cached. To cache a file from an environment + other than ``base``, either use the ``saltenv`` argument or include the + saltenv in the URL (e.g. ``salt://path/to/file.conf?saltenv=dev``). + + .. note:: + A list of URLs is not supported, this must be a single URL. If a + local file is passed here, then the state will obviously not try to + download anything, but it will compare a hash if one is specified. + + source_hash + See the documentation for this same argument in the + :py:func:`file.managed ` state. + + .. note:: + For remote files not originating from the ``salt://`` fileserver, + such as http(s) or ftp servers, this state will not re-download the + file if the locally-cached copy matches this hash. This is done to + prevent unnecessary downloading on repeated runs of this state. To + update the cached copy of a file, it is necessary to update this + hash. + + source_hash_name + See the documentation for this same argument in the + :py:func:`file.managed ` state. + + skip_verify + See the documentation for this same argument in the + :py:func:`file.managed ` state. + + .. note:: + Setting this to ``True`` will result in a copy of the file being + downloaded from a remote (http(s), ftp, etc.) source each time the + state is run. + + saltenv + Used to specify the environment from which to download a file from the + Salt fileserver (i.e. those with ``salt://`` URL). + ''' + ret = {'changes': {}, + 'comment': '', + 'name': name, + 'result': False} + + try: + parsed = _urlparse(name) + except Exception: + ret['comment'] = 'Only URLs or local file paths are valid input' + return ret + + # This if statement will keep the state from proceeding if a remote source + # is specified and no source_hash is presented (unless we're skipping hash + # verification). + if not skip_verify \ + and not source_hash \ + and parsed.scheme in salt.utils.files.REMOTE_PROTOS: + ret['comment'] = ( + 'Unable to verify upstream hash of source file {0}, please set ' + 'source_hash or set skip_verify to True'.format(name) + ) + return ret + + if source_hash: + # Get the hash and hash type from the input. This takes care of parsing + # the hash out of a file containing checksums, if that is how the + # source_hash was specified. + try: + source_sum = __salt__['file.get_source_sum']( + source=name, + source_hash=source_hash, + source_hash_name=source_hash_name, + saltenv=saltenv) + except CommandExecutionError as exc: + ret['comment'] = exc.strerror + return ret + else: + if not source_sum: + # We shouldn't get here, problems in retrieving the hash in + # file.get_source_sum should result in a CommandExecutionError + # being raised, which we catch above. Nevertheless, we should + # provide useful information in the event that + # file.get_source_sum regresses. + ret['comment'] = ( + 'Failed to get source hash from {0}. This may be a bug. ' + 'If this error persists, please report it and set ' + 'skip_verify to True to work around it.'.format(source_hash) + ) + return ret + else: + source_sum = {} + + if parsed.scheme in salt.utils.files.LOCAL_PROTOS: + # Source is a local file path + full_path = os.path.realpath(os.path.expanduser(parsed.path)) + if os.path.exists(full_path): + if not skip_verify and source_sum: + # Enforce the hash + local_hash = __salt__['file.get_hash']( + full_path, + source_sum.get('hash_type', __opts__['hash_type'])) + if local_hash == source_sum['hsum']: + ret['result'] = True + ret['comment'] = ( + 'File {0} is present on the minion and has hash ' + '{1}'.format(full_path, local_hash) + ) + else: + ret['comment'] = ( + 'File {0} is present on the minion, but the hash ({1}) ' + 'does not match the specified hash ({2})'.format( + full_path, local_hash, source_sum['hsum'] + ) + ) + return ret + else: + ret['result'] = True + ret['comment'] = 'File {0} is present on the minion'.format( + full_path + ) + return ret + else: + ret['comment'] = 'File {0} is not present on the minion'.format( + full_path + ) + return ret + + local_copy = __salt__['cp.is_cached'](name, saltenv=saltenv) + + if local_copy: + # File is already cached + pre_hash = __salt__['file.get_hash']( + local_copy, + source_sum.get('hash_type', __opts__['hash_type'])) + + if not skip_verify and source_sum: + # Get the local copy's hash to compare with the hash that was + # specified via source_hash. If it matches, we can exit early from + # the state without going any further, because the file is cached + # with the correct hash. + if pre_hash == source_sum['hsum']: + ret['result'] = True + ret['comment'] = ( + 'File is already cached to {0} with hash {1}'.format( + local_copy, pre_hash + ) + ) + else: + pre_hash = None + + # Cache the file. Note that this will not actually download the file if + # either of the following is true: + # 1. source is a salt:// URL and the fileserver determines that the hash + # of the minion's copy matches that of the fileserver. + # 2. File is remote (http(s), ftp, etc.) and the specified source_hash + # matches the cached copy. + # Remote, non salt:// sources _will_ download if a copy of the file was + # not already present in the minion cache. + try: + local_copy = __salt__['cp.cache_file']( + name, + saltenv=saltenv, + source_hash=source_sum.get('hsum')) + except Exception as exc: + ret['comment'] = exc.__str__() + return ret + + if not local_copy: + ret['comment'] = ( + 'Failed to cache {0}, check minion log for more ' + 'information'.format(name) + ) + return ret + + post_hash = __salt__['file.get_hash']( + local_copy, + source_sum.get('hash_type', __opts__['hash_type'])) + + if pre_hash != post_hash: + ret['changes']['hash'] = {'old': pre_hash, 'new': post_hash} + + # Check the hash, if we're enforcing one. Note that this will be the first + # hash check if the file was not previously cached, and the 2nd hash check + # if it was cached and the + if not skip_verify and source_sum: + if post_hash == source_sum['hsum']: + ret['result'] = True + ret['comment'] = ( + 'File is already cached to {0} with hash {1}'.format( + local_copy, post_hash + ) + ) + else: + ret['comment'] = ( + 'File is cached to {0}, but the hash ({1}) does not match ' + 'the specified hash ({2})'.format( + local_copy, post_hash, source_sum['hsum'] + ) + ) + return ret + + # We're not enforcing a hash, and we already know that the file was + # successfully cached, so we know the state was successful. + ret['result'] = True + ret['comment'] = 'File is cached to {0}'.format(local_copy) + return ret + + +def not_cached(name, saltenv='base'): + ''' + .. versionadded:: 2017.7.3 + + Ensures that a file is saved to the minion's cache. This state is primarily + invoked by other states to ensure that we do not re-download a source file + if we do not need to. + + name + The URL of the file to be cached. To cache a file from an environment + other than ``base``, either use the ``saltenv`` argument or include the + saltenv in the URL (e.g. ``salt://path/to/file.conf?saltenv=dev``). + + .. note:: + A list of URLs is not supported, this must be a single URL. If a + local file is passed here, the state will take no action. + + saltenv + Used to specify the environment from which to download a file from the + Salt fileserver (i.e. those with ``salt://`` URL). + ''' + ret = {'changes': {}, + 'comment': '', + 'name': name, + 'result': False} + + try: + parsed = _urlparse(name) + except Exception: + ret['comment'] = 'Only URLs or local file paths are valid input' + return ret + else: + if parsed.scheme in salt.utils.files.LOCAL_PROTOS: + full_path = os.path.realpath(os.path.expanduser(parsed.path)) + ret['result'] = True + ret['comment'] = ( + 'File {0} is a local path, no action taken'.format( + full_path + ) + ) + return ret + + local_copy = __salt__['cp.is_cached'](name, saltenv=saltenv) + + if local_copy: + try: + os.remove(local_copy) + except Exception as exc: + ret['comment'] = 'Failed to delete {0}: {1}'.format( + local_copy, exc.__str__() + ) + else: + ret['result'] = True + ret['changes']['deleted'] = True + ret['comment'] = '{0} was deleted'.format(local_copy) + else: + ret['result'] = True + ret['comment'] = '{0} is not cached'.format(name) + return ret diff --git a/salt/utils/files.py b/salt/utils/files.py index 1d7068987a..c55ac86324 100644 --- a/salt/utils/files.py +++ b/salt/utils/files.py @@ -35,10 +35,21 @@ except ImportError: log = logging.getLogger(__name__) +LOCAL_PROTOS = ('', 'file') REMOTE_PROTOS = ('http', 'https', 'ftp', 'swift', 's3') VALID_PROTOS = ('salt', 'file') + REMOTE_PROTOS TEMPFILE_PREFIX = '__salt.tmp.' +HASHES = { + 'sha512': 128, + 'sha384': 96, + 'sha256': 64, + 'sha224': 56, + 'sha1': 40, + 'md5': 32, +} +HASHES_REVMAP = dict([(y, x) for x, y in six.iteritems(HASHES)]) + def guess_archive_type(name): ''' @@ -538,3 +549,14 @@ def is_text_file(fp_, blocksize=512): nontext = block.translate(None, text_characters) return float(len(nontext)) / len(block) <= 0.30 + + +def remove(path): + ''' + Runs os.remove(path) and suppresses the OSError if the file doesn't exist + ''' + try: + os.remove(path) + except OSError as exc: + if exc.errno != errno.ENOENT: + raise