From 68be0f9ed28745791f4c6b927ed948f7aee16467 Mon Sep 17 00:00:00 2001
From: twangboy <slee@saltstack.com>
Date: Wed, 9 May 2018 17:48:26 -0600
Subject: [PATCH 1/8] Add is_encoding salt util

Use it to detect encoding in file.blockreplace
---
 salt/modules/file.py | 21 +++++++++++++++------
 salt/utils/files.py  | 25 +++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/salt/modules/file.py b/salt/modules/file.py
index 1b4b7e0e46..5c89f0d655 100644
--- a/salt/modules/file.py
+++ b/salt/modules/file.py
@@ -2535,11 +2535,17 @@ def blockreplace(path,
     if not os.path.exists(path):
         raise SaltInvocationError('File not found: {0}'.format(path))
 
-    if not __utils__['files.is_text'](path):
-        raise SaltInvocationError(
-            'Cannot perform string replacements on a binary file: {0}'
-            .format(path)
-        )
+    if __utils__['files.is_binary'](path):
+        # it may be a utf-8 or utf-16 encoded file
+        for encoding in ['utf-16-le', 'utf-8', 'utf-16']:
+            if __utils__['files.is_encoding'](path, encoding):
+                log.debug('Found "{0}" encoding'.format(encoding))
+                break
+        else:
+            raise SaltInvocationError(
+                'Cannot perform string replacements on a binary file: {0}'
+                .format(path)
+            )
 
     if append_newline is None and not content.endswith((os.linesep, '\n')):
         append_newline = True
@@ -2609,7 +2615,10 @@ def blockreplace(path,
 
             if linesep is None:
                 # Auto-detect line separator
-                if line.endswith('\r\n'):
+                # utf-16 encodings have \x00 between each character
+                if line.endswith('\r\x00\n'):
+                    linesep = '\r\n'
+                elif line.endswith('\r\n'):
                     linesep = '\r\n'
                 elif line.endswith('\n'):
                     linesep = '\n'
diff --git a/salt/utils/files.py b/salt/utils/files.py
index fea30e5d79..5f66c6c867 100644
--- a/salt/utils/files.py
+++ b/salt/utils/files.py
@@ -616,6 +616,31 @@ def safe_filepath(file_path_name, dir_sep=None):
     return path
 
 
+def is_encoding(path, encoding="utf-8"):
+    '''
+    Detect if the file can be successfully decoded by the passed encoding
+
+    Args:
+
+        fp_ (pointer): A pointer to the file to check
+        encoding (str): The encoding to test
+
+    Return:
+        bool: True if successful, otherwise False
+    '''
+    if not os.path.isfile(path):
+        return False
+    try:
+        with fopen(path, 'rb') as fp_:
+            try:
+                data = fp_.read(2048)
+                data.decode(encoding)
+            except UnicodeDecodeError:
+                return True
+    except os.error:
+        return False
+
+
 @jinja_filter('is_text_file')
 def is_text(fp_, blocksize=512):
     '''

From 9f369d3f227cd3f0fb43f3114ae4bbff0f29afb8 Mon Sep 17 00:00:00 2001
From: twangboy <slee@saltstack.com>
Date: Wed, 16 May 2018 12:21:33 -0600
Subject: [PATCH 2/8] Remove to_encoding, create get_encoding

Use io.open so you can pass an encoding
---
 salt/modules/file.py     |  33 ++++-------
 salt/modules/win_file.py |   2 +-
 salt/utils/files.py      | 125 +++++++++++++++++++++++++++++++--------
 3 files changed, 112 insertions(+), 48 deletions(-)

diff --git a/salt/modules/file.py b/salt/modules/file.py
index 5c89f0d655..d8cee3eb21 100644
--- a/salt/modules/file.py
+++ b/salt/modules/file.py
@@ -14,8 +14,8 @@ from __future__ import absolute_import, print_function, unicode_literals
 import datetime
 import difflib
 import errno
-import fileinput
 import fnmatch
+import io
 import itertools
 import logging
 import operator
@@ -2535,17 +2535,17 @@ def blockreplace(path,
     if not os.path.exists(path):
         raise SaltInvocationError('File not found: {0}'.format(path))
 
+    try:
+        file_encoding = __utils__['files.get_encoding'](path)
+    except CommandExecutionError:
+        file_encoding = None
+
     if __utils__['files.is_binary'](path):
-        # it may be a utf-8 or utf-16 encoded file
-        for encoding in ['utf-16-le', 'utf-8', 'utf-16']:
-            if __utils__['files.is_encoding'](path, encoding):
-                log.debug('Found "{0}" encoding'.format(encoding))
-                break
-        else:
+        if not file_encoding:
             raise SaltInvocationError(
                 'Cannot perform string replacements on a binary file: {0}'
                 .format(path)
-            )
+        )
 
     if append_newline is None and not content.endswith((os.linesep, '\n')):
         append_newline = True
@@ -2602,23 +2602,12 @@ def blockreplace(path,
     #
     # We could also use salt.utils.filebuffer.BufferedReader
     try:
-        fi_file = fileinput.input(
-            path,
-            inplace=False,
-            backup=False,
-            bufsize=1,
-            mode='rb')
-
+        fi_file = io.open(path, mode='r', encoding=file_encoding)
         for line in fi_file:
-            line = salt.utils.stringutils.to_unicode(line)
             write_line_to_new_file = True
 
             if linesep is None:
-                # Auto-detect line separator
-                # utf-16 encodings have \x00 between each character
-                if line.endswith('\r\x00\n'):
-                    linesep = '\r\n'
-                elif line.endswith('\r\n'):
+                if line.endswith('\r\n'):
                     linesep = '\r\n'
                 elif line.endswith('\n'):
                     linesep = '\n'
@@ -2718,7 +2707,7 @@ def blockreplace(path,
             try:
                 fh_ = salt.utils.atomicfile.atomic_open(path, 'wb')
                 for line in new_file:
-                    fh_.write(salt.utils.stringutils.to_bytes(line))
+                    fh_.write(salt.utils.stringutils.to_bytes(line, encoding=file_encoding))
             finally:
                 fh_.close()
 
diff --git a/salt/modules/win_file.py b/salt/modules/win_file.py
index d321bd538e..4375cbd205 100644
--- a/salt/modules/win_file.py
+++ b/salt/modules/win_file.py
@@ -30,7 +30,7 @@ import shutil  # do not remove, used in imported file.py functions
 import re  # do not remove, used in imported file.py functions
 import string  # do not remove, used in imported file.py functions
 import sys  # do not remove, used in imported file.py functions
-import fileinput  # do not remove, used in imported file.py functions
+import io  # do not remove, used in imported file.py functions
 import fnmatch  # do not remove, used in imported file.py functions
 import mmap  # do not remove, used in imported file.py functions
 import glob  # do not remove, used in imported file.py functions
diff --git a/salt/utils/files.py b/salt/utils/files.py
index 5f66c6c867..5807bf492a 100644
--- a/salt/utils/files.py
+++ b/salt/utils/files.py
@@ -6,6 +6,7 @@ Functions for working with files
 from __future__ import absolute_import, unicode_literals, print_function
 
 # Import Python libs
+import codecs
 import contextlib
 import errno
 import logging
@@ -616,31 +617,6 @@ def safe_filepath(file_path_name, dir_sep=None):
     return path
 
 
-def is_encoding(path, encoding="utf-8"):
-    '''
-    Detect if the file can be successfully decoded by the passed encoding
-
-    Args:
-
-        fp_ (pointer): A pointer to the file to check
-        encoding (str): The encoding to test
-
-    Return:
-        bool: True if successful, otherwise False
-    '''
-    if not os.path.isfile(path):
-        return False
-    try:
-        with fopen(path, 'rb') as fp_:
-            try:
-                data = fp_.read(2048)
-                data.decode(encoding)
-            except UnicodeDecodeError:
-                return True
-    except os.error:
-        return False
-
-
 @jinja_filter('is_text_file')
 def is_text(fp_, blocksize=512):
     '''
@@ -802,3 +778,102 @@ def backup_minion(path, bkroot):
     if not salt.utils.platform.is_windows():
         os.chown(bkpath, fstat.st_uid, fstat.st_gid)
         os.chmod(bkpath, fstat.st_mode)
+
+
+def get_encoding(path):
+    '''
+    Detect a file's encoding using the following:
+    - Check for ascii
+    - Check for Byte Order Marks (BOM)
+    - Check for UTF-8 Markers
+    - Check System Encoding
+
+    Args:
+
+        path (str): The path to the file to check
+
+    Returns:
+        str: The encoding of the file
+
+    Raises:
+        CommandExecutionError: If the encoding cannot be detected
+    '''
+    def check_ascii(_data):
+        # If all characters can be decoded to ASCII, then it's ASCII
+        try:
+            _data.decode('ASCII')
+            log.debug('Found ASCII')
+        except UnicodeDecodeError:
+            return False
+        else:
+            return True
+
+    def check_bom(_data):
+        # Supported Python Codecs
+        # https://docs.python.org/2/library/codecs.html
+        # https://docs.python.org/3/library/codecs.html
+        boms = [
+            ('UTF-32-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_BE)),
+            ('UTF-32-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_LE)),
+            ('UTF-16-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_BE)),
+            ('UTF-16-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_LE)),
+            ('UTF-8', salt.utils.stringutils.to_bytes(codecs.BOM_UTF8)),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38\x2D')),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38')),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x39')),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2b')),
+            ('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2f')),
+        ]
+        for _encoding, bom in boms:
+            if _data.startswith(bom):
+                log.debug('Found BOM for {0}'.format(_encoding))
+                return _encoding
+        return False
+
+    def check_utf8_markers(_data):
+        try:
+            decoded = _data.decode('UTF-8')
+        except UnicodeDecodeError:
+            return False
+        else:
+            # Reject surrogate characters in Py2 (Py3 behavior)
+            if six.PY2:
+                for char in decoded:
+                    if 0xD800 <= ord(char) <= 0xDFFF:
+                        return False
+            return True
+
+    def check_system_encoding(_data):
+        try:
+            _data.decode(__salt_system_encoding__)
+        except UnicodeDecodeError:
+            return False
+        else:
+            return True
+
+    if not os.path.isfile(path):
+        raise CommandExecutionError('Not a file')
+    try:
+        with fopen(path, 'rb') as fp_:
+            data = fp_.read(2048)
+    except os.error:
+        raise CommandExecutionError('Failed to open file')
+
+    # Check for ASCII first
+    if check_ascii(data):
+        return 'ASCII'
+
+    # Check for Unicode BOM
+    encoding = check_bom(data)
+    if encoding:
+        return encoding
+
+    # Check for UTF-8 markers
+    if check_utf8_markers(data):
+        return 'UTF-8'
+
+    # Check system encoding
+    if check_system_encoding(data):
+        return __salt_system_encoding__
+
+    raise CommandExecutionError('Could not detect file encoding')

From 6d877bb48b1aba4ab09e8e332e4fb503c868f3eb Mon Sep 17 00:00:00 2001
From: twangboy <slee@saltstack.com>
Date: Wed, 16 May 2018 12:26:50 -0600
Subject: [PATCH 3/8] Add comment

---
 salt/modules/file.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/salt/modules/file.py b/salt/modules/file.py
index d8cee3eb21..e205c59d6e 100644
--- a/salt/modules/file.py
+++ b/salt/modules/file.py
@@ -2607,6 +2607,7 @@ def blockreplace(path,
             write_line_to_new_file = True
 
             if linesep is None:
+                # Auto-detect line separator
                 if line.endswith('\r\n'):
                     linesep = '\r\n'
                 elif line.endswith('\n'):

From c0f735dde39acb140e2194d4ec8019f9b8f8a0da Mon Sep 17 00:00:00 2001
From: twangboy <slee@saltstack.com>
Date: Wed, 16 May 2018 12:29:19 -0600
Subject: [PATCH 4/8] Remove comment

---
 salt/modules/file.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/salt/modules/file.py b/salt/modules/file.py
index e205c59d6e..6f44f24f24 100644
--- a/salt/modules/file.py
+++ b/salt/modules/file.py
@@ -2599,8 +2599,6 @@ def blockreplace(path,
     # We do not use in-place editing to avoid file attrs modifications when
     # no changes are required and to avoid any file access on a partially
     # written file.
-    #
-    # We could also use salt.utils.filebuffer.BufferedReader
     try:
         fi_file = io.open(path, mode='r', encoding=file_encoding)
         for line in fi_file:

From 771392e299e57ef2d7fe4656d4fbb4988de02715 Mon Sep 17 00:00:00 2001
From: twangboy <slee@saltstack.com>
Date: Thu, 17 May 2018 13:25:35 -0600
Subject: [PATCH 5/8] Fix unit tests, add newline='' to io.open

---
 salt/modules/file.py            | 2 +-
 tests/unit/modules/test_file.py | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/salt/modules/file.py b/salt/modules/file.py
index 6f44f24f24..fb2706b85e 100644
--- a/salt/modules/file.py
+++ b/salt/modules/file.py
@@ -2600,7 +2600,7 @@ def blockreplace(path,
     # no changes are required and to avoid any file access on a partially
     # written file.
     try:
-        fi_file = io.open(path, mode='r', encoding=file_encoding)
+        fi_file = io.open(path, mode='r', encoding=file_encoding, newline='')
         for line in fi_file:
             write_line_to_new_file = True
 
diff --git a/tests/unit/modules/test_file.py b/tests/unit/modules/test_file.py
index b157a577e5..b3286efc66 100644
--- a/tests/unit/modules/test_file.py
+++ b/tests/unit/modules/test_file.py
@@ -235,7 +235,10 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
                     'grains': {},
                 },
                 '__grains__': {'kernel': 'Linux'},
-                '__utils__': {'files.is_text': MagicMock(return_value=True)},
+                '__utils__': {
+                    'files.is_binary': MagicMock(return_value=False),
+                    'files.get_encoding': MagicMock(return_value=None)
+                },
             }
         }
 
@@ -266,10 +269,12 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
         quis leo.
         ''')
 
+    MULTILINE_STRING = os.linesep.join(MULTILINE_STRING.splitlines())
+
     def setUp(self):
         self.tfile = tempfile.NamedTemporaryFile(delete=False,
                                                  prefix='blockrepltmp',
-                                                 mode='w+')
+                                                 mode='w+b')
         self.tfile.write(self.MULTILINE_STRING)
         self.tfile.close()
 

From f398cbbdda711219f097bd8d67e50241336430ed Mon Sep 17 00:00:00 2001
From: twangboy <slee@saltstack.com>
Date: Thu, 17 May 2018 14:51:22 -0600
Subject: [PATCH 6/8] Use os.linesep.join instead of textwrap.dedent

---
 tests/integration/states/test_file.py | 109 ++++++++++++++------------
 1 file changed, 58 insertions(+), 51 deletions(-)

diff --git a/tests/integration/states/test_file.py b/tests/integration/states/test_file.py
index 9064ba7cc1..891f9b562d 100644
--- a/tests/integration/states/test_file.py
+++ b/tests/integration/states/test_file.py
@@ -2275,57 +2275,64 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
 class BlockreplaceTest(ModuleCase, SaltReturnAssertsMixin):
     marker_start = '# start'
     marker_end = '# end'
-    content = textwrap.dedent('''\
-        Line 1 of block
-        Line 2 of block
-        ''')
-    without_block = textwrap.dedent('''\
-        Hello world!
-
-        # comment here
-        ''')
-    with_non_matching_block = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        No match here
-        # end
-        # comment here
-        ''')
-    with_non_matching_block_and_marker_end_not_after_newline = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        No match here# end
-        # comment here
-        ''')
-    with_matching_block = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        Line 1 of block
-        Line 2 of block
-        # end
-        # comment here
-        ''')
-    with_matching_block_and_extra_newline = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        Line 1 of block
-        Line 2 of block
-
-        # end
-        # comment here
-        ''')
-    with_matching_block_and_marker_end_not_after_newline = textwrap.dedent('''\
-        Hello world!
-
-        # start
-        Line 1 of block
-        Line 2 of block# end
-        # comment here
-        ''')
+    content = os.linesep.join([
+        'Line 1 of block',
+        'Line 2 of block',
+        ''
+    ])
+    without_block = os.linesep.join([
+        'Hello world!',
+        '',
+        '# comment here',
+        ''
+    ])
+    with_non_matching_block = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'No match here',
+        '# end',
+        '# comment here',
+        ''
+    ])
+    with_non_matching_block_and_marker_end_not_after_newline = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'No match here# end',
+        '# comment here',
+        ''
+    ])
+    with_matching_block = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'Line 1 of block',
+        'Line 2 of block',
+        '# end',
+        '# comment here',
+        ''
+    ])
+    with_matching_block_and_extra_newline = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'Line 1 of block',
+        'Line 2 of block',
+        '',
+        '# end',
+        '# comment here',
+        ''
+    ])
+    with_matching_block_and_marker_end_not_after_newline = os.linesep.join([
+        'Hello world!',
+        '',
+        '# start',
+        'Line 1 of block',
+        'Line 2 of block# end',
+        '# comment here',
+        ''
+    ])
     content_explicit_posix_newlines = ('Line 1 of block\n'
                                        'Line 2 of block\n')
     content_explicit_windows_newlines = ('Line 1 of block\r\n'

From e27e9fd1e7165ec86a0709074e5d674e1e0b901e Mon Sep 17 00:00:00 2001
From: twangboy <slee@saltstack.com>
Date: Fri, 18 May 2018 16:17:41 -0600
Subject: [PATCH 7/8] Fix tests on Py3

---
 tests/unit/modules/test_file.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/modules/test_file.py b/tests/unit/modules/test_file.py
index b3286efc66..f15b7211f0 100644
--- a/tests/unit/modules/test_file.py
+++ b/tests/unit/modules/test_file.py
@@ -275,7 +275,7 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
         self.tfile = tempfile.NamedTemporaryFile(delete=False,
                                                  prefix='blockrepltmp',
                                                  mode='w+b')
-        self.tfile.write(self.MULTILINE_STRING)
+        self.tfile.write(salt.utils.stringutils.to_bytes(self.MULTILINE_STRING))
         self.tfile.close()
 
     def tearDown(self):

From 6eff2f847be6899f1f1c6115c36a4de48cb77dfd Mon Sep 17 00:00:00 2001
From: twangboy <slee@saltstack.com>
Date: Wed, 23 May 2018 13:34:33 -0600
Subject: [PATCH 8/8] Add suggested changes

---
 salt/utils/files.py             | 2 +-
 tests/unit/modules/test_file.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/salt/utils/files.py b/salt/utils/files.py
index 5807bf492a..c41dda7252 100644
--- a/salt/utils/files.py
+++ b/salt/utils/files.py
@@ -826,7 +826,7 @@ def get_encoding(path):
         ]
         for _encoding, bom in boms:
             if _data.startswith(bom):
-                log.debug('Found BOM for {0}'.format(_encoding))
+                log.debug('Found BOM for %s', _encoding)
                 return _encoding
         return False
 
diff --git a/tests/unit/modules/test_file.py b/tests/unit/modules/test_file.py
index f15b7211f0..ae75e23364 100644
--- a/tests/unit/modules/test_file.py
+++ b/tests/unit/modules/test_file.py
@@ -237,7 +237,7 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
                 '__grains__': {'kernel': 'Linux'},
                 '__utils__': {
                     'files.is_binary': MagicMock(return_value=False),
-                    'files.get_encoding': MagicMock(return_value=None)
+                    'files.get_encoding': MagicMock(return_value='utf-8')
                 },
             }
         }