Merge pull request #47581 from twangboy/fix_47274

Add get_encoding salt util
This commit is contained in:
Nicole Thomas 2018-05-25 16:40:06 -04:00 committed by GitHub
commit da9eaa1825
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 180 additions and 71 deletions

View File

@ -14,8 +14,8 @@ from __future__ import absolute_import, print_function, unicode_literals
import datetime
import difflib
import errno
import fileinput
import fnmatch
import io
import itertools
import logging
import operator
@ -2535,10 +2535,16 @@ def blockreplace(path,
if not os.path.exists(path):
raise SaltInvocationError('File not found: {0}'.format(path))
if not __utils__['files.is_text'](path):
raise SaltInvocationError(
'Cannot perform string replacements on a binary file: {0}'
.format(path)
try:
file_encoding = __utils__['files.get_encoding'](path)
except CommandExecutionError:
file_encoding = None
if __utils__['files.is_binary'](path):
if not file_encoding:
raise SaltInvocationError(
'Cannot perform string replacements on a binary file: {0}'
.format(path)
)
if append_newline is None and not content.endswith((os.linesep, '\n')):
@ -2593,18 +2599,9 @@ def blockreplace(path,
# We do not use in-place editing to avoid file attrs modifications when
# no changes are required and to avoid any file access on a partially
# written file.
#
# We could also use salt.utils.filebuffer.BufferedReader
try:
fi_file = fileinput.input(
path,
inplace=False,
backup=False,
bufsize=1,
mode='rb')
fi_file = io.open(path, mode='r', encoding=file_encoding, newline='')
for line in fi_file:
line = salt.utils.stringutils.to_unicode(line)
write_line_to_new_file = True
if linesep is None:
@ -2709,7 +2706,7 @@ def blockreplace(path,
try:
fh_ = salt.utils.atomicfile.atomic_open(path, 'wb')
for line in new_file:
fh_.write(salt.utils.stringutils.to_bytes(line))
fh_.write(salt.utils.stringutils.to_bytes(line, encoding=file_encoding))
finally:
fh_.close()

View File

@ -30,7 +30,7 @@ import shutil # do not remove, used in imported file.py functions
import re # do not remove, used in imported file.py functions
import string # do not remove, used in imported file.py functions
import sys # do not remove, used in imported file.py functions
import fileinput # do not remove, used in imported file.py functions
import io # do not remove, used in imported file.py functions
import fnmatch # do not remove, used in imported file.py functions
import mmap # do not remove, used in imported file.py functions
import glob # do not remove, used in imported file.py functions

View File

@ -6,6 +6,7 @@ Functions for working with files
from __future__ import absolute_import, unicode_literals, print_function
# Import Python libs
import codecs
import contextlib
import errno
import logging
@ -777,3 +778,102 @@ def backup_minion(path, bkroot):
if not salt.utils.platform.is_windows():
os.chown(bkpath, fstat.st_uid, fstat.st_gid)
os.chmod(bkpath, fstat.st_mode)
def get_encoding(path):
'''
Detect a file's encoding using the following:
- Check for ascii
- Check for Byte Order Marks (BOM)
- Check for UTF-8 Markers
- Check System Encoding
Args:
path (str): The path to the file to check
Returns:
str: The encoding of the file
Raises:
CommandExecutionError: If the encoding cannot be detected
'''
def check_ascii(_data):
# If all characters can be decoded to ASCII, then it's ASCII
try:
_data.decode('ASCII')
log.debug('Found ASCII')
except UnicodeDecodeError:
return False
else:
return True
def check_bom(_data):
# Supported Python Codecs
# https://docs.python.org/2/library/codecs.html
# https://docs.python.org/3/library/codecs.html
boms = [
('UTF-32-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_BE)),
('UTF-32-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF32_LE)),
('UTF-16-BE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_BE)),
('UTF-16-LE', salt.utils.stringutils.to_bytes(codecs.BOM_UTF16_LE)),
('UTF-8', salt.utils.stringutils.to_bytes(codecs.BOM_UTF8)),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38\x2D')),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x38')),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x39')),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2b')),
('UTF-7', salt.utils.stringutils.to_bytes('\x2b\x2f\x76\x2f')),
]
for _encoding, bom in boms:
if _data.startswith(bom):
log.debug('Found BOM for %s', _encoding)
return _encoding
return False
def check_utf8_markers(_data):
try:
decoded = _data.decode('UTF-8')
except UnicodeDecodeError:
return False
else:
# Reject surrogate characters in Py2 (Py3 behavior)
if six.PY2:
for char in decoded:
if 0xD800 <= ord(char) <= 0xDFFF:
return False
return True
def check_system_encoding(_data):
try:
_data.decode(__salt_system_encoding__)
except UnicodeDecodeError:
return False
else:
return True
if not os.path.isfile(path):
raise CommandExecutionError('Not a file')
try:
with fopen(path, 'rb') as fp_:
data = fp_.read(2048)
except os.error:
raise CommandExecutionError('Failed to open file')
# Check for ASCII first
if check_ascii(data):
return 'ASCII'
# Check for Unicode BOM
encoding = check_bom(data)
if encoding:
return encoding
# Check for UTF-8 markers
if check_utf8_markers(data):
return 'UTF-8'
# Check system encoding
if check_system_encoding(data):
return __salt_system_encoding__
raise CommandExecutionError('Could not detect file encoding')

View File

@ -2275,57 +2275,64 @@ class FileTest(ModuleCase, SaltReturnAssertsMixin):
class BlockreplaceTest(ModuleCase, SaltReturnAssertsMixin):
marker_start = '# start'
marker_end = '# end'
content = textwrap.dedent('''\
Line 1 of block
Line 2 of block
''')
without_block = textwrap.dedent('''\
Hello world!
# comment here
''')
with_non_matching_block = textwrap.dedent('''\
Hello world!
# start
No match here
# end
# comment here
''')
with_non_matching_block_and_marker_end_not_after_newline = textwrap.dedent('''\
Hello world!
# start
No match here# end
# comment here
''')
with_matching_block = textwrap.dedent('''\
Hello world!
# start
Line 1 of block
Line 2 of block
# end
# comment here
''')
with_matching_block_and_extra_newline = textwrap.dedent('''\
Hello world!
# start
Line 1 of block
Line 2 of block
# end
# comment here
''')
with_matching_block_and_marker_end_not_after_newline = textwrap.dedent('''\
Hello world!
# start
Line 1 of block
Line 2 of block# end
# comment here
''')
content = os.linesep.join([
'Line 1 of block',
'Line 2 of block',
''
])
without_block = os.linesep.join([
'Hello world!',
'',
'# comment here',
''
])
with_non_matching_block = os.linesep.join([
'Hello world!',
'',
'# start',
'No match here',
'# end',
'# comment here',
''
])
with_non_matching_block_and_marker_end_not_after_newline = os.linesep.join([
'Hello world!',
'',
'# start',
'No match here# end',
'# comment here',
''
])
with_matching_block = os.linesep.join([
'Hello world!',
'',
'# start',
'Line 1 of block',
'Line 2 of block',
'# end',
'# comment here',
''
])
with_matching_block_and_extra_newline = os.linesep.join([
'Hello world!',
'',
'# start',
'Line 1 of block',
'Line 2 of block',
'',
'# end',
'# comment here',
''
])
with_matching_block_and_marker_end_not_after_newline = os.linesep.join([
'Hello world!',
'',
'# start',
'Line 1 of block',
'Line 2 of block# end',
'# comment here',
''
])
content_explicit_posix_newlines = ('Line 1 of block\n'
'Line 2 of block\n')
content_explicit_windows_newlines = ('Line 1 of block\r\n'

View File

@ -235,7 +235,10 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
'grains': {},
},
'__grains__': {'kernel': 'Linux'},
'__utils__': {'files.is_text': MagicMock(return_value=True)},
'__utils__': {
'files.is_binary': MagicMock(return_value=False),
'files.get_encoding': MagicMock(return_value='utf-8')
},
}
}
@ -266,11 +269,13 @@ class FileBlockReplaceTestCase(TestCase, LoaderModuleMockMixin):
quis leo.
''')
MULTILINE_STRING = os.linesep.join(MULTILINE_STRING.splitlines())
def setUp(self):
self.tfile = tempfile.NamedTemporaryFile(delete=False,
prefix='blockrepltmp',
mode='w+')
self.tfile.write(self.MULTILINE_STRING)
mode='w+b')
self.tfile.write(salt.utils.stringutils.to_bytes(self.MULTILINE_STRING))
self.tfile.close()
def tearDown(self):