Properly download files when following 30x redirect

This fixes cases where we did not re-parse the status line after a
redirect, causing the cached file to be zero-size as we ignored the
chunks being downloaded.
This commit is contained in:
Erik Johnson 2017-06-01 13:24:32 -05:00
parent d95c23a1a3
commit 7cff272adf

View File

@ -592,20 +592,37 @@ class Client(object):
destfp = None
try:
# Tornado calls streaming_callback on redirect response bodies.
# But we need streaming to support fetching large files (> RAM avail).
# Here we working this around by disabling recording the body for redirections.
# The issue is fixed in Tornado 4.3.0 so on_header callback could be removed
# when we'll deprecate Tornado<4.3.0.
# See #27093 and #30431 for details.
# But we need streaming to support fetching large files (> RAM
# avail). Here we working this around by disabling recording the
# body for redirections. The issue is fixed in Tornado 4.3.0 so
# on_header callback could be removed when we'll deprecate
# Tornado<4.3.0. See #27093 and #30431 for details.
# Use list here to make it writable inside the on_header callback. Simple bool doesn't
# work here: on_header creates a new local variable instead. This could be avoided in
# Py3 with 'nonlocal' statement. There is no Py2 alternative for this.
# Use list here to make it writable inside the on_header callback.
# Simple bool doesn't work here: on_header creates a new local
# variable instead. This could be avoided in Py3 with 'nonlocal'
# statement. There is no Py2 alternative for this.
#
# write_body[0] is used by the on_chunk callback to tell it whether
# or not we need to write the body of the request to disk. For
# 30x redirects we set this to False because we don't want to
# write the contents to disk, as we will need to wait until we
# get to the redirected URL.
#
# write_body[1] will contain a tornado.httputil.HTTPHeaders
# instance that we will use to parse each header line. We
# initialize this to False, and after we parse the status line we
# will replace it with the HTTPHeaders instance. If/when we have
# found the encoding used in the request, we set this value to
# False to signify that we are done parsing.
#
# write_body[2] is where the encoding will be stored
write_body = [None, False, None]
def on_header(hdr):
if write_body[1] is not False and write_body[2] is None:
# Try to find out what content type encoding is used if this is a text file
# Try to find out what content type encoding is used if
# this is a text file
write_body[1].parse_line(hdr) # pylint: disable=no-member
if 'Content-Type' in write_body[1]:
content_type = write_body[1].get('Content-Type') # pylint: disable=no-member
@ -621,17 +638,22 @@ class Client(object):
# We have found our encoding. Stop processing headers.
write_body[1] = False
if write_body[0] is not None:
# We already parsed the first line. No need to run the code below again
return
# If write_body[0] is False, this means that this
# header is a 30x redirect, so we need to reset
# write_body[0] to None so that we parse the HTTP
# status code from the redirect target.
if write_body[0] is write_body[1] is False:
write_body[0] = None
try:
hdr = parse_response_start_line(hdr)
except HTTPInputError:
# Not the first line, do nothing
return
write_body[0] = hdr.code not in [301, 302, 303, 307]
write_body[1] = HTTPHeaders()
# Check the status line of the HTTP request
if write_body[0] is None:
try:
hdr = parse_response_start_line(hdr)
except HTTPInputError:
# Not the first line, do nothing
return
write_body[0] = hdr.code not in [301, 302, 303, 307]
write_body[1] = HTTPHeaders()
if no_cache:
result = []