Properly download files when following 30x redirect

This fixes cases where we did not re-parse the status line after a redirect, causing the cached file to be zero-size as we ignored the chunks being downloaded.
2024-11-07 17:09:03 +00:00 · 2017-06-01 13:24:32 -05:00 · 2017-06-01 13:24:32 -05:00 · 7cff272adf
commit 7cff272adf
parent d95c23a1a3
1 changed files with 41 additions and 19 deletions
--- a/salt/fileclient.py
+++ b/salt/fileclient.py
@ -592,20 +592,37 @@ class Client(object):
        destfp = None
        try:
            # Tornado calls streaming_callback on redirect response bodies.
-            # But we need streaming to support fetching large files (> RAM avail).
-            # Here we working this around by disabling recording the body for redirections.
-            # The issue is fixed in Tornado 4.3.0 so on_header callback could be removed
-            # when we'll deprecate Tornado<4.3.0.
-            # See #27093 and #30431 for details.
+            # But we need streaming to support fetching large files (> RAM
+            # avail).  Here we working this around by disabling recording the
+            # body for redirections.  The issue is fixed in Tornado 4.3.0 so
+            # on_header callback could be removed when we'll deprecate
+            # Tornado<4.3.0. See #27093 and #30431 for details.

-            # Use list here to make it writable inside the on_header callback. Simple bool doesn't
-            # work here: on_header creates a new local variable instead. This could be avoided in
-            # Py3 with 'nonlocal' statement. There is no Py2 alternative for this.
+            # Use list here to make it writable inside the on_header callback.
+            # Simple bool doesn't work here: on_header creates a new local
+            # variable instead. This could be avoided in Py3 with 'nonlocal'
+            # statement. There is no Py2 alternative for this.
+            #
+            # write_body[0] is used by the on_chunk callback to tell it whether
+            #   or not we need to write the body of the request to disk. For
+            #   30x redirects we set this to False because we don't want to
+            #   write the contents to disk, as we will need to wait until we
+            #   get to the redirected URL.
+            #
+            # write_body[1] will contain a tornado.httputil.HTTPHeaders
+            #   instance that we will use to parse each header line. We
+            #   initialize this to False, and after we parse the status line we
+            #   will replace it with the HTTPHeaders instance. If/when we have
+            #   found the encoding used in the request, we set this value to
+            #   False to signify that we are done parsing.
+            #
+            # write_body[2] is where the encoding will be stored
            write_body = [None, False, None]

            def on_header(hdr):
                if write_body[1] is not False and write_body[2] is None:
-                    # Try to find out what content type encoding is used if this is a text file
+                    # Try to find out what content type encoding is used if
+                    # this is a text file
                    write_body[1].parse_line(hdr)  # pylint: disable=no-member
                    if 'Content-Type' in write_body[1]:
                        content_type = write_body[1].get('Content-Type')  # pylint: disable=no-member
@ -621,17 +638,22 @@ class Client(object):
                            # We have found our encoding. Stop processing headers.
                            write_body[1] = False

-                if write_body[0] is not None:
-                    # We already parsed the first line. No need to run the code below again
-                    return
+                        # If write_body[0] is False, this means that this
+                        # header is a 30x redirect, so we need to reset
+                        # write_body[0] to None so that we parse the HTTP
+                        # status code from the redirect target.
+                        if write_body[0] is write_body[1] is False:
+                            write_body[0] = None

-                try:
-                    hdr = parse_response_start_line(hdr)
-                except HTTPInputError:
-                    # Not the first line, do nothing
-                    return
-                write_body[0] = hdr.code not in [301, 302, 303, 307]
-                write_body[1] = HTTPHeaders()
+                # Check the status line of the HTTP request
+                if write_body[0] is None:
+                    try:
+                        hdr = parse_response_start_line(hdr)
+                    except HTTPInputError:
+                        # Not the first line, do nothing
+                        return
+                    write_body[0] = hdr.code not in [301, 302, 303, 307]
+                    write_body[1] = HTTPHeaders()

            if no_cache:
                result = []