我正在尝试使用 urllib2 http 客户端在 python 中创建下载进度条。我查看了 API(以及在 google 上),似乎 urllib2 不允许您注册进度挂钩。但是,不推荐使用的较旧的 urllib 确实具有此功能。
有谁知道如何使用 urllib2 创建进度条或报告挂钩?还是有其他一些技巧可以获得类似的功能?
我正在尝试使用 urllib2 http 客户端在 python 中创建下载进度条。我查看了 API(以及在 google 上),似乎 urllib2 不允许您注册进度挂钩。但是,不推荐使用的较旧的 urllib 确实具有此功能。
有谁知道如何使用 urllib2 创建进度条或报告挂钩?还是有其他一些技巧可以获得类似的功能?
这是一个基于 Anurag 在响应中分块的方法的完整示例。我的版本允许您设置块大小,并附加任意报告功能:
import urllib2, sys
def chunk_report(bytes_so_far, chunk_size, total_size):
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
(bytes_so_far, total_size, percent))
if bytes_so_far >= total_size:
sys.stdout.write('\n')
def chunk_read(response, chunk_size=8192, report_hook=None):
total_size = response.info().getheader('Content-Length').strip()
total_size = int(total_size)
bytes_so_far = 0
while 1:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
if not chunk:
break
if report_hook:
report_hook(bytes_so_far, chunk_size, total_size)
return bytes_so_far
if __name__ == '__main__':
response = urllib2.urlopen('http://www.ebay.com');
chunk_read(response, report_hook=chunk_report)
为什么不只是以块的形式读取数据并在其间做任何你想做的事情,例如在线程中运行,挂钩到 UI 等
import urllib2
urlfile = urllib2.urlopen("http://www.google.com")
data_list = []
chunk = 4096
while 1:
data = urlfile.read(chunk)
if not data:
print "done."
break
data_list.append(data)
print "Read %s bytes"%len(data)
输出:
Read 4096 bytes
Read 3113 bytes
done.
urlgrabber内置了对进度通知的支持。
temp_filename = "/tmp/" + file_url.split('/')[-1]
f = open(temp_filename, 'wb')
remote_file = urllib2.urlopen(file_url)
try:
total_size = remote_file.info().getheader('Content-Length').strip()
header = True
except AttributeError:
header = False # a response doesn't always include the "Content-Length" header
if header:
total_size = int(total_size)
bytes_so_far = 0
while True:
buffer = remote_file.read(8192)
if not buffer:
sys.stdout.write('\n')
break
bytes_so_far += len(buffer)
f.write(buffer)
if not header:
total_size = bytes_so_far # unknown size
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, total_size, percent))
对 Triptych 的响应稍作修改,以允许实际写出文件(python3):
from urllib.request import urlopen
def chunk_report(bytes_so_far, chunk_size, total_size):
percent = float(bytes_so_far) / total_size
percent = round(percent*100, 2)
sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
(bytes_so_far, total_size, percent))
if bytes_so_far >= total_size:
sys.stdout.write('\n')
def chunk_read(response, chunk_size=8192, report_hook=None):
total_size = response.info().get("Content-Length").strip()
total_size = int(total_size)
bytes_so_far = 0
data = b""
while 1:
chunk = response.read(chunk_size)
bytes_so_far += len(chunk)
if not chunk:
break
if report_hook:
report_hook(bytes_so_far, chunk_size, total_size)
data += chunk
return data
用法:
with open(out_path, "wb") as f:
response = urlopen(filepath)
data_read = chunk_read(response, report_hook=chunk_report)
f.write(data_read)