Merge pull request #47581 from twangboy/fix_47274

Add get_encoding salt util
2024-11-07 08:58:59 +00:00 · 2018-05-25 16:40:06 -04:00 · 2018-05-25 16:40:06 -04:00 · da9eaa1825
commit da9eaa1825
parent b860d95b2c ff6600f25e
5 changed files with 180 additions and 71 deletions
--- a/salt/modules/file.py
+++ b/salt/modules/file.py
@ -14,8 +14,8 @@ from __future__ import absolute_import, print_function, unicode_literals
 import datetime
 import difflib
 import errno
-import fileinput
 import fnmatch
+import io
 import itertools
 import logging
 import operator
@ -2535,10 +2535,16 @@ def blockreplace(path,
    if not os.path.exists(path):
        raise SaltInvocationError('File not found: {0}'.format(path))

-    if not __utils__['files.is_text'](path):
-        raise SaltInvocationError(
-            'Cannot perform string replacements on a binary file: {0}'
-            .format(path)
+    try:
+        file_encoding = __utils__['files.get_encoding'](path)
+    except CommandExecutionError:
+        file_encoding = None
+
+    if __utils__['files.is_binary'](path):
+        if not file_encoding:
+            raise SaltInvocationError(
+                'Cannot perform string replacements on a binary file: {0}'
+                .format(path)
        )

    if append_newline is None and not content.endswith((os.linesep, '\n')):
@ -2593,18 +2599,9 @@ def blockreplace(path,
    # We do not use in-place editing to avoid file attrs modifications when
    # no changes are required and to avoid any file access on a partially
    # written file.
-    #
-    # We could also use salt.utils.filebuffer.BufferedReader
    try:
-        fi_file = fileinput.input(
-            path,
-            inplace=False,
-            backup=False,
-            bufsize=1,
-            mode='rb')
-
+        fi_file = io.open(path, mode='r', encoding=file_encoding, newline='')
        for line in fi_file:
-            line = salt.utils.stringutils.to_unicode(line)
            write_line_to_new_file = True

            if linesep is None:
@ -2709,7 +2706,7 @@ def blockreplace(path,
            try:
                fh_ = salt.utils.atomicfile.atomic_open(path, 'wb')
                for line in new_file:
-                    fh_.write(salt.utils.stringutils.to_bytes(line))
+                    fh_.write(salt.utils.stringutils.to_bytes(line, encoding=file_encoding))
            finally:
                fh_.close()

--- a/salt/modules/win_file.py
+++ b/salt/modules/win_file.py
@ -30,7 +30,7 @@ import shutil  # do not remove, used in imported file.py functions
 import re  # do not remove, used in imported file.py functions
 import string  # do not remove, used in imported file.py functions
 import sys  # do not remove, used in imported file.py functions
-import fileinput  # do not remove, used in imported file.py functions
+import io  # do not remove, used in imported file.py functions
 import fnmatch  # do not remove, used in imported file.py functions
 import mmap  # do not remove, used in imported file.py functions
 import glob  # do not remove, used in imported file.py functions
--- a/salt/utils/files.py
+++ b/salt/utils/files.py
@ -6,6 +6,7 @@ Functions for working with files
 from __future__ import absolute_import, unicode_literals, print_function

 # Import Python libs
+import codecs
 import contextlib
 import errno
 import logging
@ -777,3 +778,102 @@ def backup_minion(path, bkroot):
    if not salt.utils.platform.is_windows():
        os.chown(bkpath, fstat.st_uid, fstat.st_gid)
        os.chmod(bkpath, fstat.st_mode)
+
+
+def get_encoding(path):
+    '''
+    Detect a file's encoding using the following:
+    - Check for ascii
+    - Check for Byte Order Marks (BOM)
+    - Check for UTF-8 Markers
+    - Check System Encoding
+
+    Args:
+
+        path (str): The path to the file to check
+
+    Returns:
+        str: The encoding of the file
+
+    Raises:
+        CommandExecutionError: If the encoding cannot be detected
+    '''
+    def check_ascii(_data):
+        # If all characters can be decoded to ASCII, then it's ASCII
+        try:
+            _data.decode('ASCII')
+            log.debug('Found ASCII')
+        except UnicodeDecodeError:
+            return False
+        else:
+            return True
+
+    def check_bom(_data):
+        # Supported Python Codecs
+        # https://docs.python.org/2/library/codecs.html
+        # https://docs.python.org/3/library/codecs.html
+        boms = [
+            ('UTF-32-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_BE)),
+            ('UTF-32-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_LE)),
+            ('UTF-16-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_BE)),
+            ('UTF-16-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_LE)),
+            ('UTF-8', salt.utils.stringutils.to_bytes(codecs.BOM_UTF8)),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38\x2D')),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38')),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x39')),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2b')),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2f')),
+        ]
+        for _encoding, bom in boms:
+            if _data.startswith(bom):
+                log.debug('Found BOM for %s', _encoding)
+                return _encoding
+        return False
+
+    def check_utf8_markers(_data):
+        try:
+            decoded = _data.decode('UTF-8')
+        except UnicodeDecodeError:
+            return False
+        else:
+            # Reject surrogate characters in Py2 (Py3 behavior)
+            if six.PY2:
+                for char in decoded:
+                    if 0xD800 <= ord(char) <= 0xDFFF:
+                        return False
+            return True
+
+    def check_system_encoding(_data):
+        try:
+            _data.decode(__salt_system_encoding__)
+        except UnicodeDecodeError:
+            return False
+        else:
+            return True
+
+    if not os.path.isfile(path):
+        raise CommandExecutionError('Not a file')
+    try:
+        with fopen(path, 'rb') as fp_:
+            data = fp_.read(2048)
+    except os.error:
+        raise CommandExecutionError('Failed to open file')
+
+    # Check for ASCII first
+    if check_ascii(data):
+        return 'ASCII'
+
+    # Check for Unicode BOM
+    encoding = check_bom(data)
+    if encoding:
+        return encoding
+
+    # Check for UTF-8 markers
+    if check_utf8_markers(data):
+        return 'UTF-8'
+
+    # Check system encoding
+    if check_system_encoding(data):
+        return __salt_system_encoding__
+
+    raise CommandExecutionError('Could not detect file encoding')
--- a/tests/integration/states/test_file.py
+++ b/tests/integration/states/test_file.py
@ -2275,57 +2275,64 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
 class BlockreplaceTest(ModuleCase, SaltReturnAssertsMixin):
    marker_start = '# start'
    marker_end = '# end'
-    content = textwrap.dedent('''\
-        Line 1 of block
-        Line 2 of block
-        ''')
-    without_block = textwrap.dedent('''\
-        Hello world!
-
-        # comment here
-        ''')
-    with_non_matching_block = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        No match here
-        # end
-        # comment here
-        ''')
-    with_non_matching_block_and_marker_end_not_after_newline = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        No match here# end
-        # comment here
-        ''')
-    with_matching_block = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        Line 1 of block
-        Line 2 of block
-        # end
-        # comment here
-        ''')
-    with_matching_block_and_extra_newline = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        Line 1 of block
-        Line 2 of block
-
-        # end
-        # comment here
-        ''')
-    with_matching_block_and_marker_end_not_after_newline = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        Line 1 of block
-        Line 2 of block# end
-        # comment here
-        ''')
+    content = os.linesep.join([
+        'Line 1 of block',
+        'Line 2 of block',
+        ''
+    ])
+    without_block = os.linesep.join([
+        'Hello world!',
+        '',
+        '# comment here',
+        ''
+    ])
+    with_non_matching_block = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'No match here',
+        '# end',
+        '# comment here',
+        ''
+    ])
+    with_non_matching_block_and_marker_end_not_after_newline = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'No match here# end',
+        '# comment here',
+        ''
+    ])
+    with_matching_block = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'Line 1 of block',
+        'Line 2 of block',
+        '# end',
+        '# comment here',
+        ''
+    ])
+    with_matching_block_and_extra_newline = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'Line 1 of block',
+        'Line 2 of block',
+        '',
+        '# end',
+        '# comment here',
+        ''
+    ])
+    with_matching_block_and_marker_end_not_after_newline = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'Line 1 of block',
+        'Line 2 of block# end',
+        '# comment here',
+        ''
+    ])
    content_explicit_posix_newlines = ('Line 1 of block\n'
                                       'Line 2 of block\n')
    content_explicit_windows_newlines = ('Line 1 of block\r\n'
--- a/tests/unit/modules/test_file.py
+++ b/tests/unit/modules/test_file.py
@ -235,7 +235,10 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
                    'grains': {},
                },
                '__grains__': {'kernel': 'Linux'},
-                '__utils__': {'files.is_text': MagicMock(return_value=True)},
+                '__utils__': {
+                    'files.is_binary': MagicMock(return_value=False),
+                    'files.get_encoding': MagicMock(return_value='utf-8')
+                },
            }
        }

@ -266,11 +269,13 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
        quis leo.
        ''')

+    MULTILINE_STRING = os.linesep.join(MULTILINE_STRING.splitlines())
+
    def setUp(self):
        self.tfile = tempfile.NamedTemporaryFile(delete=False,
                                                 prefix='blockrepltmp',
-                                                 mode='w+')
-        self.tfile.write(self.MULTILINE_STRING)
+                                                 mode='w+b')
+        self.tfile.write(salt.utils.stringutils.to_bytes(self.MULTILINE_STRING))
        self.tfile.close()

    def tearDown(self):