From 68be0f9ed28745791f4c6b927ed948f7aee16467 Mon Sep 17 00:00:00 2001 From: twangboy Date: Wed, 9 May 2018 17:48:26 -0600 Subject: [PATCH 1/8] Add is_encoding salt util Use it to detect encoding in file.blockreplace --- salt/modules/file.py | 21 +++++++++++++++------ salt/utils/files.py | 25 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/salt/modules/file.py b/salt/modules/file.py index 1b4b7e0e46..5c89f0d655 100644 --- a/salt/modules/file.py +++ b/salt/modules/file.py @@ -2535,11 +2535,17 @@ def blockreplace(path, if not os.path.exists(path): raise SaltInvocationError('File not found: {0}'.format(path)) - if not __utils__['files.is_text'](path): - raise SaltInvocationError( - 'Cannot perform string replacements on a binary file: {0}' - .format(path) - ) + if __utils__['files.is_binary'](path): + # it may be a utf-8 or utf-16 encoded file + for encoding in ['utf-16-le', 'utf-8', 'utf-16']: + if __utils__['files.is_encoding'](path, encoding): + log.debug('Found "{0}" encoding'.format(encoding)) + break + else: + raise SaltInvocationError( + 'Cannot perform string replacements on a binary file: {0}' + .format(path) + ) if append_newline is None and not content.endswith((os.linesep, '\n')): append_newline = True @@ -2609,7 +2615,10 @@ def blockreplace(path, if linesep is None: # Auto-detect line separator - if line.endswith('\r\n'): + # utf-16 encodings have \x00 between each character + if line.endswith('\r\x00\n'): + linesep = '\r\n' + elif line.endswith('\r\n'): linesep = '\r\n' elif line.endswith('\n'): linesep = '\n' diff --git a/salt/utils/files.py b/salt/utils/files.py index fea30e5d79..5f66c6c867 100644 --- a/salt/utils/files.py +++ b/salt/utils/files.py @@ -616,6 +616,31 @@ def safe_filepath(file_path_name, dir_sep=None): return path +def is_encoding(path, encoding="utf-8"): + ''' + Detect if the file can be successfully decoded by the passed encoding + + Args: + + fp_ (pointer): A pointer to the file to check + encoding (str): The encoding to test + + Return: + bool: True if successful, otherwise False + ''' + if not os.path.isfile(path): + return False + try: + with fopen(path, 'rb') as fp_: + try: + data = fp_.read(2048) + data.decode(encoding) + except UnicodeDecodeError: + return True + except os.error: + return False + + @jinja_filter('is_text_file') def is_text(fp_, blocksize=512): ''' From 9f369d3f227cd3f0fb43f3114ae4bbff0f29afb8 Mon Sep 17 00:00:00 2001 From: twangboy Date: Wed, 16 May 2018 12:21:33 -0600 Subject: [PATCH 2/8] Remove to_encoding, create get_encoding Use io.open so you can pass an encoding --- salt/modules/file.py | 33 ++++------- salt/modules/win_file.py | 2 +- salt/utils/files.py | 125 +++++++++++++++++++++++++++++++-------- 3 files changed, 112 insertions(+), 48 deletions(-) diff --git a/salt/modules/file.py b/salt/modules/file.py index 5c89f0d655..d8cee3eb21 100644 --- a/salt/modules/file.py +++ b/salt/modules/file.py @@ -14,8 +14,8 @@ from __future__ import absolute_import, print_function, unicode_literals import datetime import difflib import errno -import fileinput import fnmatch +import io import itertools import logging import operator @@ -2535,17 +2535,17 @@ def blockreplace(path, if not os.path.exists(path): raise SaltInvocationError('File not found: {0}'.format(path)) + try: + file_encoding = __utils__['files.get_encoding'](path) + except CommandExecutionError: + file_encoding = None + if __utils__['files.is_binary'](path): - # it may be a utf-8 or utf-16 encoded file - for encoding in ['utf-16-le', 'utf-8', 'utf-16']: - if __utils__['files.is_encoding'](path, encoding): - log.debug('Found "{0}" encoding'.format(encoding)) - break - else: + if not file_encoding: raise SaltInvocationError( 'Cannot perform string replacements on a binary file: {0}' .format(path) - ) + ) if append_newline is None and not content.endswith((os.linesep, '\n')): append_newline = True @@ -2602,23 +2602,12 @@ def blockreplace(path, # # We could also use salt.utils.filebuffer.BufferedReader try: - fi_file = fileinput.input( - path, - inplace=False, - backup=False, - bufsize=1, - mode='rb') - + fi_file = io.open(path, mode='r', encoding=file_encoding) for line in fi_file: - line = salt.utils.stringutils.to_unicode(line) write_line_to_new_file = True if linesep is None: - # Auto-detect line separator - # utf-16 encodings have \x00 between each character - if line.endswith('\r\x00\n'): - linesep = '\r\n' - elif line.endswith('\r\n'): + if line.endswith('\r\n'): linesep = '\r\n' elif line.endswith('\n'): linesep = '\n' @@ -2718,7 +2707,7 @@ def blockreplace(path, try: fh_ = salt.utils.atomicfile.atomic_open(path, 'wb') for line in new_file: - fh_.write(salt.utils.stringutils.to_bytes(line)) + fh_.write(salt.utils.stringutils.to_bytes(line, encoding=file_encoding)) finally: fh_.close() diff --git a/salt/modules/win_file.py b/salt/modules/win_file.py index d321bd538e..4375cbd205 100644 --- a/salt/modules/win_file.py +++ b/salt/modules/win_file.py @@ -30,7 +30,7 @@ import shutil # do not remove, used in imported file.py functions import re # do not remove, used in imported file.py functions import string # do not remove, used in imported file.py functions import sys # do not remove, used in imported file.py functions -import fileinput # do not remove, used in imported file.py functions +import io # do not remove, used in imported file.py functions import fnmatch # do not remove, used in imported file.py functions import mmap # do not remove, used in imported file.py functions import glob # do not remove, used in imported file.py functions diff --git a/salt/utils/files.py b/salt/utils/files.py index 5f66c6c867..5807bf492a 100644 --- a/salt/utils/files.py +++ b/salt/utils/files.py @@ -6,6 +6,7 @@ Functions for working with files from __future__ import absolute_import, unicode_literals, print_function # Import Python libs +import codecs import contextlib import errno import logging @@ -616,31 +617,6 @@ def safe_filepath(file_path_name, dir_sep=None): return path -def is_encoding(path, encoding="utf-8"): - ''' - Detect if the file can be successfully decoded by the passed encoding - - Args: - - fp_ (pointer): A pointer to the file to check - encoding (str): The encoding to test - - Return: - bool: True if successful, otherwise False - ''' - if not os.path.isfile(path): - return False - try: - with fopen(path, 'rb') as fp_: - try: - data = fp_.read(2048) - data.decode(encoding) - except UnicodeDecodeError: - return True - except os.error: - return False - - @jinja_filter('is_text_file') def is_text(fp_, blocksize=512): ''' @@ -802,3 +778,102 @@ def backup_minion(path, bkroot): if not salt.utils.platform.is_windows(): os.chown(bkpath, fstat.st_uid, fstat.st_gid) os.chmod(bkpath, fstat.st_mode) + + +def get_encoding(path): + ''' + Detect a file's encoding using the following: + - Check for ascii + - Check for Byte Order Marks (BOM) + - Check for UTF-8 Markers + - Check System Encoding + + Args: + + path (str): The path to the file to check + + Returns: + str: The encoding of the file + + Raises: + CommandExecutionError: If the encoding cannot be detected + ''' + def check_ascii(_data): + # If all characters can be decoded to ASCII, then it's ASCII + try: + _data.decode('ASCII') + log.debug('Found ASCII') + except UnicodeDecodeError: + return False + else: + return True + + def check_bom(_data): + # Supported Python Codecs + # https://docs.python.org/2/library/codecs.html + # https://docs.python.org/3/library/codecs.html + boms = [ + ('UTF-32-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_BE)), + ('UTF-32-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_LE)), + ('UTF-16-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_BE)), + ('UTF-16-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_LE)), + ('UTF-8', salt.utils.stringutils.to_bytes(codecs.BOM_UTF8)), + ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38\x2D')), + ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38')), + ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x39')), + ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2b')), + ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2f')), + ] + for _encoding, bom in boms: + if _data.startswith(bom): + log.debug('Found BOM for {0}'.format(_encoding)) + return _encoding + return False + + def check_utf8_markers(_data): + try: + decoded = _data.decode('UTF-8') + except UnicodeDecodeError: + return False + else: + # Reject surrogate characters in Py2 (Py3 behavior) + if six.PY2: + for char in decoded: + if 0xD800 <= ord(char) <= 0xDFFF: + return False + return True + + def check_system_encoding(_data): + try: + _data.decode(__salt_system_encoding__) + except UnicodeDecodeError: + return False + else: + return True + + if not os.path.isfile(path): + raise CommandExecutionError('Not a file') + try: + with fopen(path, 'rb') as fp_: + data = fp_.read(2048) + except os.error: + raise CommandExecutionError('Failed to open file') + + # Check for ASCII first + if check_ascii(data): + return 'ASCII' + + # Check for Unicode BOM + encoding = check_bom(data) + if encoding: + return encoding + + # Check for UTF-8 markers + if check_utf8_markers(data): + return 'UTF-8' + + # Check system encoding + if check_system_encoding(data): + return __salt_system_encoding__ + + raise CommandExecutionError('Could not detect file encoding') From 6d877bb48b1aba4ab09e8e332e4fb503c868f3eb Mon Sep 17 00:00:00 2001 From: twangboy Date: Wed, 16 May 2018 12:26:50 -0600 Subject: [PATCH 3/8] Add comment --- salt/modules/file.py | 1 + 1 file changed, 1 insertion(+) diff --git a/salt/modules/file.py b/salt/modules/file.py index d8cee3eb21..e205c59d6e 100644 --- a/salt/modules/file.py +++ b/salt/modules/file.py @@ -2607,6 +2607,7 @@ def blockreplace(path, write_line_to_new_file = True if linesep is None: + # Auto-detect line separator if line.endswith('\r\n'): linesep = '\r\n' elif line.endswith('\n'): From c0f735dde39acb140e2194d4ec8019f9b8f8a0da Mon Sep 17 00:00:00 2001 From: twangboy Date: Wed, 16 May 2018 12:29:19 -0600 Subject: [PATCH 4/8] Remove comment --- salt/modules/file.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/salt/modules/file.py b/salt/modules/file.py index e205c59d6e..6f44f24f24 100644 --- a/salt/modules/file.py +++ b/salt/modules/file.py @@ -2599,8 +2599,6 @@ def blockreplace(path, # We do not use in-place editing to avoid file attrs modifications when # no changes are required and to avoid any file access on a partially # written file. - # - # We could also use salt.utils.filebuffer.BufferedReader try: fi_file = io.open(path, mode='r', encoding=file_encoding) for line in fi_file: From 771392e299e57ef2d7fe4656d4fbb4988de02715 Mon Sep 17 00:00:00 2001 From: twangboy Date: Thu, 17 May 2018 13:25:35 -0600 Subject: [PATCH 5/8] Fix unit tests, add newline='' to io.open --- salt/modules/file.py | 2 +- tests/unit/modules/test_file.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/salt/modules/file.py b/salt/modules/file.py index 6f44f24f24..fb2706b85e 100644 --- a/salt/modules/file.py +++ b/salt/modules/file.py @@ -2600,7 +2600,7 @@ def blockreplace(path, # no changes are required and to avoid any file access on a partially # written file. try: - fi_file = io.open(path, mode='r', encoding=file_encoding) + fi_file = io.open(path, mode='r', encoding=file_encoding, newline='') for line in fi_file: write_line_to_new_file = True diff --git a/tests/unit/modules/test_file.py b/tests/unit/modules/test_file.py index b157a577e5..b3286efc66 100644 --- a/tests/unit/modules/test_file.py +++ b/tests/unit/modules/test_file.py @@ -235,7 +235,10 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin): 'grains': {}, }, '__grains__': {'kernel': 'Linux'}, - '__utils__': {'files.is_text': MagicMock(return_value=True)}, + '__utils__': { + 'files.is_binary': MagicMock(return_value=False), + 'files.get_encoding': MagicMock(return_value=None) + }, } } @@ -266,10 +269,12 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin): quis leo. ''') + MULTILINE_STRING = os.linesep.join(MULTILINE_STRING.splitlines()) + def setUp(self): self.tfile = tempfile.NamedTemporaryFile(delete=False, prefix='blockrepltmp', - mode='w+') + mode='w+b') self.tfile.write(self.MULTILINE_STRING) self.tfile.close() From f398cbbdda711219f097bd8d67e50241336430ed Mon Sep 17 00:00:00 2001 From: twangboy Date: Thu, 17 May 2018 14:51:22 -0600 Subject: [PATCH 6/8] Use os.linesep.join instead of textwrap.dedent --- tests/integration/states/test_file.py | 109 ++++++++++++++------------ 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/tests/integration/states/test_file.py b/tests/integration/states/test_file.py index 9064ba7cc1..891f9b562d 100644 --- a/tests/integration/states/test_file.py +++ b/tests/integration/states/test_file.py @@ -2275,57 +2275,64 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin): class BlockreplaceTest(ModuleCase, SaltReturnAssertsMixin): marker_start = '# start' marker_end = '# end' - content = textwrap.dedent('''\ - Line 1 of block - Line 2 of block - ''') - without_block = textwrap.dedent('''\ - Hello world! - - # comment here - ''') - with_non_matching_block = textwrap.dedent('''\ - Hello world! - - # start - No match here - # end - # comment here - ''') - with_non_matching_block_and_marker_end_not_after_newline = textwrap.dedent('''\ - Hello world! - - # start - No match here# end - # comment here - ''') - with_matching_block = textwrap.dedent('''\ - Hello world! - - # start - Line 1 of block - Line 2 of block - # end - # comment here - ''') - with_matching_block_and_extra_newline = textwrap.dedent('''\ - Hello world! - - # start - Line 1 of block - Line 2 of block - - # end - # comment here - ''') - with_matching_block_and_marker_end_not_after_newline = textwrap.dedent('''\ - Hello world! - - # start - Line 1 of block - Line 2 of block# end - # comment here - ''') + content = os.linesep.join([ + 'Line 1 of block', + 'Line 2 of block', + '' + ]) + without_block = os.linesep.join([ + 'Hello world!', + '', + '# comment here', + '' + ]) + with_non_matching_block = os.linesep.join([ + 'Hello world!', + '', + '# start', + 'No match here', + '# end', + '# comment here', + '' + ]) + with_non_matching_block_and_marker_end_not_after_newline = os.linesep.join([ + 'Hello world!', + '', + '# start', + 'No match here# end', + '# comment here', + '' + ]) + with_matching_block = os.linesep.join([ + 'Hello world!', + '', + '# start', + 'Line 1 of block', + 'Line 2 of block', + '# end', + '# comment here', + '' + ]) + with_matching_block_and_extra_newline = os.linesep.join([ + 'Hello world!', + '', + '# start', + 'Line 1 of block', + 'Line 2 of block', + '', + '# end', + '# comment here', + '' + ]) + with_matching_block_and_marker_end_not_after_newline = os.linesep.join([ + 'Hello world!', + '', + '# start', + 'Line 1 of block', + 'Line 2 of block# end', + '# comment here', + '' + ]) content_explicit_posix_newlines = ('Line 1 of block\n' 'Line 2 of block\n') content_explicit_windows_newlines = ('Line 1 of block\r\n' From e27e9fd1e7165ec86a0709074e5d674e1e0b901e Mon Sep 17 00:00:00 2001 From: twangboy Date: Fri, 18 May 2018 16:17:41 -0600 Subject: [PATCH 7/8] Fix tests on Py3 --- tests/unit/modules/test_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/modules/test_file.py b/tests/unit/modules/test_file.py index b3286efc66..f15b7211f0 100644 --- a/tests/unit/modules/test_file.py +++ b/tests/unit/modules/test_file.py @@ -275,7 +275,7 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin): self.tfile = tempfile.NamedTemporaryFile(delete=False, prefix='blockrepltmp', mode='w+b') - self.tfile.write(self.MULTILINE_STRING) + self.tfile.write(salt.utils.stringutils.to_bytes(self.MULTILINE_STRING)) self.tfile.close() def tearDown(self): From 6eff2f847be6899f1f1c6115c36a4de48cb77dfd Mon Sep 17 00:00:00 2001 From: twangboy Date: Wed, 23 May 2018 13:34:33 -0600 Subject: [PATCH 8/8] Add suggested changes --- salt/utils/files.py | 2 +- tests/unit/modules/test_file.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/salt/utils/files.py b/salt/utils/files.py index 5807bf492a..c41dda7252 100644 --- a/salt/utils/files.py +++ b/salt/utils/files.py @@ -826,7 +826,7 @@ def get_encoding(path): ] for _encoding, bom in boms: if _data.startswith(bom): - log.debug('Found BOM for {0}'.format(_encoding)) + log.debug('Found BOM for %s', _encoding) return _encoding return False diff --git a/tests/unit/modules/test_file.py b/tests/unit/modules/test_file.py index f15b7211f0..ae75e23364 100644 --- a/tests/unit/modules/test_file.py +++ b/tests/unit/modules/test_file.py @@ -237,7 +237,7 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin): '__grains__': {'kernel': 'Linux'}, '__utils__': { 'files.is_binary': MagicMock(return_value=False), - 'files.get_encoding': MagicMock(return_value=None) + 'files.get_encoding': MagicMock(return_value='utf-8') }, } }