From 7691dee4ed9d4801c856d3e3a7990f0eebeac8f3 Mon Sep 17 00:00:00 2001
From: Erik Johnson <palehose@gmail.com>
Date: Wed, 11 Apr 2018 14:41:54 -0500
Subject: [PATCH 1/2] Add to_str option to decode funcs

This allows for the string to be forced to a str type instead of
unicode on PY2.
---
 salt/utils/data.py            | 73 +++++++++++++++-----------
 salt/utils/stringutils.py     | 22 +++++---
 tests/unit/utils/test_data.py | 98 ++++++++++++++++++++++++++++++++++-
 3 files changed, 153 insertions(+), 40 deletions(-)

diff --git a/salt/utils/data.py b/salt/utils/data.py
index e2ea9f6623..5fe5b12523 100644
--- a/salt/utils/data.py
+++ b/salt/utils/data.py
@@ -68,9 +68,12 @@ def compare_lists(old=None, new=None):
 
 
 def decode(data, encoding=None, errors='strict', keep=False,
-           normalize=False, preserve_dict_class=False, preserve_tuples=False):
+           normalize=False, preserve_dict_class=False, preserve_tuples=False,
+           to_str=False):
     '''
-    Generic function which will decode whichever type is passed, if necessary
+    Generic function which will decode whichever type is passed, if necessary.
+    Optionally use to_str=True to ensure strings are str types and not unicode
+    on Python 2.
 
     If `strict` is True, and `keep` is False, and we fail to decode, a
     UnicodeDecodeError will be raised. Passing `keep` as True allows for the
@@ -94,22 +97,24 @@ def decode(data, encoding=None, errors='strict', keep=False,
     for the base character, and one for the breve mark). Normalizing allows for
     a more reliable test case.
     '''
+    _decode_func = salt.utils.stringutils.to_unicode \
+        if not to_str \
+        else salt.utils.stringutils.to_str
     if isinstance(data, collections.Mapping):
         return decode_dict(data, encoding, errors, keep, normalize,
-                           preserve_dict_class, preserve_tuples)
+                           preserve_dict_class, preserve_tuples, to_str)
     elif isinstance(data, list):
         return decode_list(data, encoding, errors, keep, normalize,
-                           preserve_dict_class, preserve_tuples)
+                           preserve_dict_class, preserve_tuples, to_str)
     elif isinstance(data, tuple):
         return decode_tuple(data, encoding, errors, keep, normalize,
-                            preserve_dict_class) \
+                            preserve_dict_class, to_str) \
             if preserve_tuples \
             else decode_list(data, encoding, errors, keep, normalize,
-                             preserve_dict_class, preserve_tuples)
+                             preserve_dict_class, preserve_tuples, to_str)
     else:
         try:
-            data = salt.utils.stringutils.to_unicode(
-                data, encoding, errors, normalize)
+            data = _decode_func(data, encoding, errors, normalize)
         except TypeError:
             # to_unicode raises a TypeError when input is not a
             # string/bytestring/bytearray. This is expected and simply means we
@@ -123,23 +128,26 @@ def decode(data, encoding=None, errors='strict', keep=False,
 
 def decode_dict(data, encoding=None, errors='strict', keep=False,
                 normalize=False, preserve_dict_class=False,
-                preserve_tuples=False):
+                preserve_tuples=False, to_str=False):
     '''
-    Decode all string values to Unicode
+    Decode all string values to Unicode. Optionally use to_str=True to ensure
+    strings are str types and not unicode on Python 2.
     '''
+    _decode_func = salt.utils.stringutils.to_unicode \
+        if not to_str \
+        else salt.utils.stringutils.to_str
     # Make sure we preserve OrderedDicts
     rv = data.__class__() if preserve_dict_class else {}
     for key, value in six.iteritems(data):
         if isinstance(key, tuple):
             key = decode_tuple(key, encoding, errors, keep, normalize,
-                               preserve_dict_class) \
+                               preserve_dict_class, to_str) \
                 if preserve_tuples \
                 else decode_list(key, encoding, errors, keep, normalize,
-                                 preserve_dict_class, preserve_tuples)
+                                 preserve_dict_class, preserve_tuples, to_str)
         else:
             try:
-                key = salt.utils.stringutils.to_unicode(
-                    key, encoding, errors, normalize)
+                key = _decode_func(key, encoding, errors, normalize)
             except TypeError:
                 # to_unicode raises a TypeError when input is not a
                 # string/bytestring/bytearray. This is expected and simply
@@ -151,20 +159,19 @@ def decode_dict(data, encoding=None, errors='strict', keep=False,
 
         if isinstance(value, list):
             value = decode_list(value, encoding, errors, keep, normalize,
-                                preserve_dict_class, preserve_tuples)
+                                preserve_dict_class, preserve_tuples, to_str)
         elif isinstance(value, tuple):
             value = decode_tuple(value, encoding, errors, keep, normalize,
-                                 preserve_dict_class) \
+                                 preserve_dict_class, to_str) \
                 if preserve_tuples \
                 else decode_list(value, encoding, errors, keep, normalize,
-                                 preserve_dict_class, preserve_tuples)
+                                 preserve_dict_class, preserve_tuples, to_str)
         elif isinstance(value, collections.Mapping):
             value = decode_dict(value, encoding, errors, keep, normalize,
-                                preserve_dict_class, preserve_tuples)
+                                preserve_dict_class, preserve_tuples, to_str)
         else:
             try:
-                value = salt.utils.stringutils.to_unicode(
-                    value, encoding, errors, normalize)
+                value = _decode_func(value, encoding, errors, normalize)
             except TypeError:
                 # to_unicode raises a TypeError when input is not a
                 # string/bytestring/bytearray. This is expected and simply
@@ -180,28 +187,31 @@ def decode_dict(data, encoding=None, errors='strict', keep=False,
 
 def decode_list(data, encoding=None, errors='strict', keep=False,
                 normalize=False, preserve_dict_class=False,
-                preserve_tuples=False):
+                preserve_tuples=False, to_str=False):
     '''
-    Decode all string values to Unicode
+    Decode all string values to Unicode. Optionally use to_str=True to ensure
+    strings are str types and not unicode on Python 2.
     '''
+    _decode_func = salt.utils.stringutils.to_unicode \
+        if not to_str \
+        else salt.utils.stringutils.to_str
     rv = []
     for item in data:
         if isinstance(item, list):
             item = decode_list(item, encoding, errors, keep, normalize,
-                               preserve_dict_class, preserve_tuples)
+                               preserve_dict_class, preserve_tuples, to_str)
         elif isinstance(item, tuple):
             item = decode_tuple(item, encoding, errors, keep, normalize,
-                                preserve_dict_class) \
+                                preserve_dict_class, to_str) \
                 if preserve_tuples \
                 else decode_list(item, encoding, errors, keep, normalize,
-                                 preserve_dict_class, preserve_tuples)
+                                 preserve_dict_class, preserve_tuples, to_str)
         elif isinstance(item, collections.Mapping):
             item = decode_dict(item, encoding, errors, keep, normalize,
-                               preserve_dict_class, preserve_tuples)
+                               preserve_dict_class, preserve_tuples, to_str)
         else:
             try:
-                item = salt.utils.stringutils.to_unicode(
-                    item, encoding, errors, normalize)
+                item = _decode_func(item, encoding, errors, normalize)
             except TypeError:
                 # to_unicode raises a TypeError when input is not a
                 # string/bytestring/bytearray. This is expected and simply
@@ -216,13 +226,14 @@ def decode_list(data, encoding=None, errors='strict', keep=False,
 
 
 def decode_tuple(data, encoding=None, errors='strict', keep=False,
-                 normalize=False, preserve_dict_class=False):
+                 normalize=False, preserve_dict_class=False, to_str=False):
     '''
-    Decode all string values to Unicode
+    Decode all string values to Unicode. Optionally use to_str=True to ensure
+    strings are str types and not unicode on Python 2.
     '''
     return tuple(
         decode_list(data, encoding, errors, keep, normalize,
-                    preserve_dict_class, True)
+                    preserve_dict_class, True, to_str)
     )
 
 
diff --git a/salt/utils/stringutils.py b/salt/utils/stringutils.py
index 0673032851..3c23d6b1b7 100644
--- a/salt/utils/stringutils.py
+++ b/salt/utils/stringutils.py
@@ -51,39 +51,45 @@ def to_bytes(s, encoding=None, errors='strict'):
         return to_str(s, encoding, errors)
 
 
-def to_str(s, encoding=None, errors='strict'):
+def to_str(s, encoding=None, errors='strict', normalize=False):
     '''
     Given str, bytes, bytearray, or unicode (py2), return str
     '''
+    def _normalize(s):
+        try:
+            return unicodedata.normalize('NFC', s) if normalize else s
+        except TypeError:
+            return s
+
     # This shouldn't be six.string_types because if we're on PY2 and we already
     # have a string, we should just return it.
     if isinstance(s, str):
-        return s
+        return _normalize(s)
     if six.PY3:
         if isinstance(s, (bytes, bytearray)):
             if encoding:
-                return s.decode(encoding, errors)
+                return _normalize(s.decode(encoding, errors))
             else:
                 try:
                     # Try UTF-8 first
-                    return s.decode('utf-8', errors)
+                    return _normalize(s.decode('utf-8', errors))
                 except UnicodeDecodeError:
                     # Fall back to detected encoding
-                    return s.decode(__salt_system_encoding__, errors)
+                    return _normalize(s.decode(__salt_system_encoding__, errors))
         raise TypeError('expected str, bytes, or bytearray not {}'.format(type(s)))
     else:
         if isinstance(s, bytearray):
             return str(s)  # future lint: disable=blacklisted-function
         if isinstance(s, unicode):  # pylint: disable=incompatible-py3-code,undefined-variable
             if encoding:
-                return s.encode(encoding, errors)
+                return _normalize(s).encode(encoding, errors)
             else:
                 try:
                     # Try UTF-8 first
-                    return s.encode('utf-8', errors)
+                    return _normalize(s).encode('utf-8', errors)
                 except UnicodeEncodeError:
                     # Fall back to detected encoding
-                    return s.encode(__salt_system_encoding__, errors)
+                    return _normalize(s).encode(__salt_system_encoding__, errors)
         raise TypeError('expected str, bytearray, or unicode')
 
 
diff --git a/tests/unit/utils/test_data.py b/tests/unit/utils/test_data.py
index e0d319eb25..328b229c47 100644
--- a/tests/unit/utils/test_data.py
+++ b/tests/unit/utils/test_data.py
@@ -9,14 +9,16 @@ import logging
 
 # Import Salt libs
 import salt.utils.data
-import salt.utils.data
+import salt.utils.stringutils
 from salt.utils.odict import OrderedDict
 from tests.support.unit import TestCase, skipIf, LOREM_IPSUM
 from tests.support.mock import patch, NO_MOCK, NO_MOCK_REASON
 from salt.ext.six.moves import builtins  # pylint: disable=import-error,redefined-builtin
+from salt.ext import six
 
 log = logging.getLogger(__name__)
 _b = lambda x: x.encode('utf-8')
+_s = lambda x: salt.utils.stringutils.to_str(x, normalize=True)
 # Some randomized data that will not decode
 BYTES = b'\x9c\xb1\xf7\xa3'
 # This is an example of a unicode string with й constructed using two separate
@@ -213,6 +215,9 @@ class DataTestCase(TestCase):
 
     def test_decode(self):
         '''
+        Companion to test_decode_to_str, they should both be kept up-to-date
+        with one another.
+
         NOTE: This uses the lambda "_b" defined above in the global scope,
         which encodes a string to a bytestring, assuming utf-8.
         '''
@@ -291,6 +296,97 @@ class DataTestCase(TestCase):
             BYTES,
             keep=False)
 
+    def test_decode_to_str(self):
+        '''
+        Companion to test_decode, they should both be kept up-to-date with one
+        another.
+
+        NOTE: This uses the lambda "_s" defined above in the global scope,
+        which converts the string/bytestring to a str type.
+        '''
+        expected = [
+            _s('unicode_str'),
+            _s('питон'),
+            123,
+            456.789,
+            True,
+            False,
+            None,
+            _s('яйца'),
+            BYTES,
+            [123, 456.789, _s('спам'), True, False, None, _s('яйца'), BYTES],
+            (987, 654.321, _s('яйца'), _s('яйца'), None, (True, _s('яйца'), BYTES)),
+            {_s('str_key'): _s('str_val'),
+             None: True,
+             123: 456.789,
+             _s('яйца'): BYTES,
+             _s('subdict'): {
+                 _s('unicode_key'): _s('яйца'),
+                 _s('tuple'): (123, _s('hello'), _s('world'), True, _s('яйца'), BYTES),
+                 _s('list'): [456, _s('спам'), False, _s('яйца'), BYTES]}},
+            OrderedDict([(_s('foo'), _s('bar')), (123, 456), (_s('яйца'), BYTES)])
+        ]
+
+        ret = salt.utils.data.decode(
+            self.test_data,
+            keep=True,
+            normalize=True,
+            preserve_dict_class=True,
+            preserve_tuples=True,
+            to_str=True)
+        self.assertEqual(ret, expected)
+
+        if six.PY3:
+            # The binary data in the data structure should fail to decode, even
+            # using the fallback, and raise an exception.
+            self.assertRaises(
+                UnicodeDecodeError,
+                salt.utils.data.decode,
+                self.test_data,
+                keep=False,
+                normalize=True,
+                preserve_dict_class=True,
+                preserve_tuples=True,
+                to_str=True)
+
+        # Now munge the expected data so that we get what we would expect if we
+        # disable preservation of dict class and tuples
+        expected[10] = [987, 654.321, _s('яйца'), _s('яйца'), None, [True, _s('яйца'), BYTES]]
+        expected[11][_s('subdict')][_s('tuple')] = [123, _s('hello'), _s('world'), True, _s('яйца'), BYTES]
+        expected[12] = {_s('foo'): _s('bar'), 123: 456, _s('яйца'): BYTES}
+
+        ret = salt.utils.data.decode(
+            self.test_data,
+            keep=True,
+            normalize=True,
+            preserve_dict_class=False,
+            preserve_tuples=False,
+            to_str=True)
+        self.assertEqual(ret, expected)
+
+        # Now test single non-string, non-data-structure items, these should
+        # return the same value when passed to this function
+        for item in (123, 4.56, True, False, None):
+            log.debug('Testing decode of %s', item)
+            self.assertEqual(salt.utils.data.decode(item, to_str=True), item)
+
+        # Test single strings (not in a data structure)
+        self.assertEqual(salt.utils.data.decode('foo', to_str=True), _s('foo'))
+        self.assertEqual(salt.utils.data.decode(_b('bar'), to_str=True), _s('bar'))
+
+        # Test binary blob
+        self.assertEqual(
+            salt.utils.data.decode(BYTES, keep=True, to_str=True),
+            BYTES
+        )
+        if six.PY3:
+            self.assertRaises(
+                UnicodeDecodeError,
+                salt.utils.data.decode,
+                BYTES,
+                keep=False,
+                to_str=True)
+
     @skipIf(NO_MOCK, NO_MOCK_REASON)
     def test_decode_fallback(self):
         '''

From ab6314247b7df98ffd3879ab8c0f081884e06bfd Mon Sep 17 00:00:00 2001
From: Erik Johnson <palehose@gmail.com>
Date: Wed, 11 Apr 2018 14:45:42 -0500
Subject: [PATCH 2/2] ldapmod.py/ldap3.py: Force modlist for search/modify/etc.
 to be str types

---
 salt/modules/ldap3.py   | 17 +++++++++++++----
 salt/modules/ldapmod.py |  3 ++-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/salt/modules/ldap3.py b/salt/modules/ldap3.py
index 0e44135152..8c937f766f 100644
--- a/salt/modules/ldap3.py
+++ b/salt/modules/ldap3.py
@@ -12,6 +12,8 @@ This is an alternative to the ``ldap`` interface provided by the
 '''
 
 from __future__ import absolute_import, print_function, unicode_literals
+import logging
+import sys
 
 available_backends = set()
 try:
@@ -22,9 +24,9 @@ try:
     available_backends.add('ldap')
 except ImportError:
     pass
-import logging
+
+import salt.utils.data
 from salt.ext import six
-import sys
 
 log = logging.getLogger(__name__)
 
@@ -407,7 +409,10 @@ def add(connect_spec, dn, attributes):
     if 'unicodePwd' in attributes:
         attributes['unicodePwd'] = [_format_unicode_password(x) for x in attributes['unicodePwd']]
 
-    modlist = ldap.modlist.addModlist(attributes)
+    modlist = salt.utils.data.decode(
+        ldap.modlist.addModlist(attributes),
+        to_str=True
+    )
     try:
         l.c.add_s(dn, modlist)
     except ldap.LDAPError as e:
@@ -507,6 +512,7 @@ def modify(connect_spec, dn, directives):
             modlist[idx] = (mod[0], mod[1],
                 [_format_unicode_password(x) for x in mod[2]])
 
+    modlist = salt.utils.data.decode(modlist, to_str=True)
     try:
         l.c.modify_s(dn, modlist)
     except ldap.LDAPError as e:
@@ -573,7 +579,10 @@ def change(connect_spec, dn, before, after):
     if 'unicodePwd' in after:
         after['unicodePwd'] = [_format_unicode_password(x) for x in after['unicodePwd']]
 
-    modlist = ldap.modlist.modifyModlist(before, after)
+    modlist = salt.utils.data.decode(
+        ldap.modlist.modifyModlist(before, after),
+        to_str=True
+    )
     try:
         l.c.modify_s(dn, modlist)
     except ldap.LDAPError as e:
diff --git a/salt/modules/ldapmod.py b/salt/modules/ldapmod.py
index a167fe4feb..abc1460f15 100644
--- a/salt/modules/ldapmod.py
+++ b/salt/modules/ldapmod.py
@@ -46,6 +46,7 @@ import logging
 import time
 
 # Import Salt libs
+import salt.utils.data
 from salt.ext import six
 from salt.exceptions import CommandExecutionError
 
@@ -140,7 +141,7 @@ def search(filter,      # pylint: disable=C0103
     if attrs == '':  # Allow command line 'return all' attr override
         attrs = None
     elif attrs is None:
-        attrs = _config('attrs')
+        attrs = salt.utils.data.decode(_config('attrs'), to_str=True)
     _ldap = _connect(**kwargs)
     start = time.time()
     log.debug(