From 7691dee4ed9d4801c856d3e3a7990f0eebeac8f3 Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Wed, 11 Apr 2018 14:41:54 -0500 Subject: [PATCH 1/2] Add to_str option to decode funcs This allows for the string to be forced to a str type instead of unicode on PY2. --- salt/utils/data.py | 73 +++++++++++++++----------- salt/utils/stringutils.py | 22 +++++--- tests/unit/utils/test_data.py | 98 ++++++++++++++++++++++++++++++++++- 3 files changed, 153 insertions(+), 40 deletions(-) diff --git a/salt/utils/data.py b/salt/utils/data.py index e2ea9f6623..5fe5b12523 100644 --- a/salt/utils/data.py +++ b/salt/utils/data.py @@ -68,9 +68,12 @@ def compare_lists(old=None, new=None): def decode(data, encoding=None, errors='strict', keep=False, - normalize=False, preserve_dict_class=False, preserve_tuples=False): + normalize=False, preserve_dict_class=False, preserve_tuples=False, + to_str=False): ''' - Generic function which will decode whichever type is passed, if necessary + Generic function which will decode whichever type is passed, if necessary. + Optionally use to_str=True to ensure strings are str types and not unicode + on Python 2. If `strict` is True, and `keep` is False, and we fail to decode, a UnicodeDecodeError will be raised. Passing `keep` as True allows for the @@ -94,22 +97,24 @@ def decode(data, encoding=None, errors='strict', keep=False, for the base character, and one for the breve mark). Normalizing allows for a more reliable test case. ''' + _decode_func = salt.utils.stringutils.to_unicode \ + if not to_str \ + else salt.utils.stringutils.to_str if isinstance(data, collections.Mapping): return decode_dict(data, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) elif isinstance(data, list): return decode_list(data, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) elif isinstance(data, tuple): return decode_tuple(data, encoding, errors, keep, normalize, - preserve_dict_class) \ + preserve_dict_class, to_str) \ if preserve_tuples \ else decode_list(data, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) else: try: - data = salt.utils.stringutils.to_unicode( - data, encoding, errors, normalize) + data = _decode_func(data, encoding, errors, normalize) except TypeError: # to_unicode raises a TypeError when input is not a # string/bytestring/bytearray. This is expected and simply means we @@ -123,23 +128,26 @@ def decode(data, encoding=None, errors='strict', keep=False, def decode_dict(data, encoding=None, errors='strict', keep=False, normalize=False, preserve_dict_class=False, - preserve_tuples=False): + preserve_tuples=False, to_str=False): ''' - Decode all string values to Unicode + Decode all string values to Unicode. Optionally use to_str=True to ensure + strings are str types and not unicode on Python 2. ''' + _decode_func = salt.utils.stringutils.to_unicode \ + if not to_str \ + else salt.utils.stringutils.to_str # Make sure we preserve OrderedDicts rv = data.__class__() if preserve_dict_class else {} for key, value in six.iteritems(data): if isinstance(key, tuple): key = decode_tuple(key, encoding, errors, keep, normalize, - preserve_dict_class) \ + preserve_dict_class, to_str) \ if preserve_tuples \ else decode_list(key, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) else: try: - key = salt.utils.stringutils.to_unicode( - key, encoding, errors, normalize) + key = _decode_func(key, encoding, errors, normalize) except TypeError: # to_unicode raises a TypeError when input is not a # string/bytestring/bytearray. This is expected and simply @@ -151,20 +159,19 @@ def decode_dict(data, encoding=None, errors='strict', keep=False, if isinstance(value, list): value = decode_list(value, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) elif isinstance(value, tuple): value = decode_tuple(value, encoding, errors, keep, normalize, - preserve_dict_class) \ + preserve_dict_class, to_str) \ if preserve_tuples \ else decode_list(value, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) elif isinstance(value, collections.Mapping): value = decode_dict(value, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) else: try: - value = salt.utils.stringutils.to_unicode( - value, encoding, errors, normalize) + value = _decode_func(value, encoding, errors, normalize) except TypeError: # to_unicode raises a TypeError when input is not a # string/bytestring/bytearray. This is expected and simply @@ -180,28 +187,31 @@ def decode_dict(data, encoding=None, errors='strict', keep=False, def decode_list(data, encoding=None, errors='strict', keep=False, normalize=False, preserve_dict_class=False, - preserve_tuples=False): + preserve_tuples=False, to_str=False): ''' - Decode all string values to Unicode + Decode all string values to Unicode. Optionally use to_str=True to ensure + strings are str types and not unicode on Python 2. ''' + _decode_func = salt.utils.stringutils.to_unicode \ + if not to_str \ + else salt.utils.stringutils.to_str rv = [] for item in data: if isinstance(item, list): item = decode_list(item, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) elif isinstance(item, tuple): item = decode_tuple(item, encoding, errors, keep, normalize, - preserve_dict_class) \ + preserve_dict_class, to_str) \ if preserve_tuples \ else decode_list(item, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) elif isinstance(item, collections.Mapping): item = decode_dict(item, encoding, errors, keep, normalize, - preserve_dict_class, preserve_tuples) + preserve_dict_class, preserve_tuples, to_str) else: try: - item = salt.utils.stringutils.to_unicode( - item, encoding, errors, normalize) + item = _decode_func(item, encoding, errors, normalize) except TypeError: # to_unicode raises a TypeError when input is not a # string/bytestring/bytearray. This is expected and simply @@ -216,13 +226,14 @@ def decode_list(data, encoding=None, errors='strict', keep=False, def decode_tuple(data, encoding=None, errors='strict', keep=False, - normalize=False, preserve_dict_class=False): + normalize=False, preserve_dict_class=False, to_str=False): ''' - Decode all string values to Unicode + Decode all string values to Unicode. Optionally use to_str=True to ensure + strings are str types and not unicode on Python 2. ''' return tuple( decode_list(data, encoding, errors, keep, normalize, - preserve_dict_class, True) + preserve_dict_class, True, to_str) ) diff --git a/salt/utils/stringutils.py b/salt/utils/stringutils.py index 0673032851..3c23d6b1b7 100644 --- a/salt/utils/stringutils.py +++ b/salt/utils/stringutils.py @@ -51,39 +51,45 @@ def to_bytes(s, encoding=None, errors='strict'): return to_str(s, encoding, errors) -def to_str(s, encoding=None, errors='strict'): +def to_str(s, encoding=None, errors='strict', normalize=False): ''' Given str, bytes, bytearray, or unicode (py2), return str ''' + def _normalize(s): + try: + return unicodedata.normalize('NFC', s) if normalize else s + except TypeError: + return s + # This shouldn't be six.string_types because if we're on PY2 and we already # have a string, we should just return it. if isinstance(s, str): - return s + return _normalize(s) if six.PY3: if isinstance(s, (bytes, bytearray)): if encoding: - return s.decode(encoding, errors) + return _normalize(s.decode(encoding, errors)) else: try: # Try UTF-8 first - return s.decode('utf-8', errors) + return _normalize(s.decode('utf-8', errors)) except UnicodeDecodeError: # Fall back to detected encoding - return s.decode(__salt_system_encoding__, errors) + return _normalize(s.decode(__salt_system_encoding__, errors)) raise TypeError('expected str, bytes, or bytearray not {}'.format(type(s))) else: if isinstance(s, bytearray): return str(s) # future lint: disable=blacklisted-function if isinstance(s, unicode): # pylint: disable=incompatible-py3-code,undefined-variable if encoding: - return s.encode(encoding, errors) + return _normalize(s).encode(encoding, errors) else: try: # Try UTF-8 first - return s.encode('utf-8', errors) + return _normalize(s).encode('utf-8', errors) except UnicodeEncodeError: # Fall back to detected encoding - return s.encode(__salt_system_encoding__, errors) + return _normalize(s).encode(__salt_system_encoding__, errors) raise TypeError('expected str, bytearray, or unicode') diff --git a/tests/unit/utils/test_data.py b/tests/unit/utils/test_data.py index e0d319eb25..328b229c47 100644 --- a/tests/unit/utils/test_data.py +++ b/tests/unit/utils/test_data.py @@ -9,14 +9,16 @@ import logging # Import Salt libs import salt.utils.data -import salt.utils.data +import salt.utils.stringutils from salt.utils.odict import OrderedDict from tests.support.unit import TestCase, skipIf, LOREM_IPSUM from tests.support.mock import patch, NO_MOCK, NO_MOCK_REASON from salt.ext.six.moves import builtins # pylint: disable=import-error,redefined-builtin +from salt.ext import six log = logging.getLogger(__name__) _b = lambda x: x.encode('utf-8') +_s = lambda x: salt.utils.stringutils.to_str(x, normalize=True) # Some randomized data that will not decode BYTES = b'\x9c\xb1\xf7\xa3' # This is an example of a unicode string with й constructed using two separate @@ -213,6 +215,9 @@ class DataTestCase(TestCase): def test_decode(self): ''' + Companion to test_decode_to_str, they should both be kept up-to-date + with one another. + NOTE: This uses the lambda "_b" defined above in the global scope, which encodes a string to a bytestring, assuming utf-8. ''' @@ -291,6 +296,97 @@ class DataTestCase(TestCase): BYTES, keep=False) + def test_decode_to_str(self): + ''' + Companion to test_decode, they should both be kept up-to-date with one + another. + + NOTE: This uses the lambda "_s" defined above in the global scope, + which converts the string/bytestring to a str type. + ''' + expected = [ + _s('unicode_str'), + _s('питон'), + 123, + 456.789, + True, + False, + None, + _s('яйца'), + BYTES, + [123, 456.789, _s('спам'), True, False, None, _s('яйца'), BYTES], + (987, 654.321, _s('яйца'), _s('яйца'), None, (True, _s('яйца'), BYTES)), + {_s('str_key'): _s('str_val'), + None: True, + 123: 456.789, + _s('яйца'): BYTES, + _s('subdict'): { + _s('unicode_key'): _s('яйца'), + _s('tuple'): (123, _s('hello'), _s('world'), True, _s('яйца'), BYTES), + _s('list'): [456, _s('спам'), False, _s('яйца'), BYTES]}}, + OrderedDict([(_s('foo'), _s('bar')), (123, 456), (_s('яйца'), BYTES)]) + ] + + ret = salt.utils.data.decode( + self.test_data, + keep=True, + normalize=True, + preserve_dict_class=True, + preserve_tuples=True, + to_str=True) + self.assertEqual(ret, expected) + + if six.PY3: + # The binary data in the data structure should fail to decode, even + # using the fallback, and raise an exception. + self.assertRaises( + UnicodeDecodeError, + salt.utils.data.decode, + self.test_data, + keep=False, + normalize=True, + preserve_dict_class=True, + preserve_tuples=True, + to_str=True) + + # Now munge the expected data so that we get what we would expect if we + # disable preservation of dict class and tuples + expected[10] = [987, 654.321, _s('яйца'), _s('яйца'), None, [True, _s('яйца'), BYTES]] + expected[11][_s('subdict')][_s('tuple')] = [123, _s('hello'), _s('world'), True, _s('яйца'), BYTES] + expected[12] = {_s('foo'): _s('bar'), 123: 456, _s('яйца'): BYTES} + + ret = salt.utils.data.decode( + self.test_data, + keep=True, + normalize=True, + preserve_dict_class=False, + preserve_tuples=False, + to_str=True) + self.assertEqual(ret, expected) + + # Now test single non-string, non-data-structure items, these should + # return the same value when passed to this function + for item in (123, 4.56, True, False, None): + log.debug('Testing decode of %s', item) + self.assertEqual(salt.utils.data.decode(item, to_str=True), item) + + # Test single strings (not in a data structure) + self.assertEqual(salt.utils.data.decode('foo', to_str=True), _s('foo')) + self.assertEqual(salt.utils.data.decode(_b('bar'), to_str=True), _s('bar')) + + # Test binary blob + self.assertEqual( + salt.utils.data.decode(BYTES, keep=True, to_str=True), + BYTES + ) + if six.PY3: + self.assertRaises( + UnicodeDecodeError, + salt.utils.data.decode, + BYTES, + keep=False, + to_str=True) + @skipIf(NO_MOCK, NO_MOCK_REASON) def test_decode_fallback(self): ''' From ab6314247b7df98ffd3879ab8c0f081884e06bfd Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Wed, 11 Apr 2018 14:45:42 -0500 Subject: [PATCH 2/2] ldapmod.py/ldap3.py: Force modlist for search/modify/etc. to be str types --- salt/modules/ldap3.py | 17 +++++++++++++---- salt/modules/ldapmod.py | 3 ++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/salt/modules/ldap3.py b/salt/modules/ldap3.py index 0e44135152..8c937f766f 100644 --- a/salt/modules/ldap3.py +++ b/salt/modules/ldap3.py @@ -12,6 +12,8 @@ This is an alternative to the ``ldap`` interface provided by the ''' from __future__ import absolute_import, print_function, unicode_literals +import logging +import sys available_backends = set() try: @@ -22,9 +24,9 @@ try: available_backends.add('ldap') except ImportError: pass -import logging + +import salt.utils.data from salt.ext import six -import sys log = logging.getLogger(__name__) @@ -407,7 +409,10 @@ def add(connect_spec, dn, attributes): if 'unicodePwd' in attributes: attributes['unicodePwd'] = [_format_unicode_password(x) for x in attributes['unicodePwd']] - modlist = ldap.modlist.addModlist(attributes) + modlist = salt.utils.data.decode( + ldap.modlist.addModlist(attributes), + to_str=True + ) try: l.c.add_s(dn, modlist) except ldap.LDAPError as e: @@ -507,6 +512,7 @@ def modify(connect_spec, dn, directives): modlist[idx] = (mod[0], mod[1], [_format_unicode_password(x) for x in mod[2]]) + modlist = salt.utils.data.decode(modlist, to_str=True) try: l.c.modify_s(dn, modlist) except ldap.LDAPError as e: @@ -573,7 +579,10 @@ def change(connect_spec, dn, before, after): if 'unicodePwd' in after: after['unicodePwd'] = [_format_unicode_password(x) for x in after['unicodePwd']] - modlist = ldap.modlist.modifyModlist(before, after) + modlist = salt.utils.data.decode( + ldap.modlist.modifyModlist(before, after), + to_str=True + ) try: l.c.modify_s(dn, modlist) except ldap.LDAPError as e: diff --git a/salt/modules/ldapmod.py b/salt/modules/ldapmod.py index a167fe4feb..abc1460f15 100644 --- a/salt/modules/ldapmod.py +++ b/salt/modules/ldapmod.py @@ -46,6 +46,7 @@ import logging import time # Import Salt libs +import salt.utils.data from salt.ext import six from salt.exceptions import CommandExecutionError @@ -140,7 +141,7 @@ def search(filter, # pylint: disable=C0103 if attrs == '': # Allow command line 'return all' attr override attrs = None elif attrs is None: - attrs = _config('attrs') + attrs = salt.utils.data.decode(_config('attrs'), to_str=True) _ldap = _connect(**kwargs) start = time.time() log.debug(