我有一个 Python 脚本,它启动一个可下载文件的 URL。有没有办法让 Python 显示下载进度而不是启动浏览器?
11 回答
我刚刚为此编写了一个超级简单(有点hacky)的方法,用于从某个站点上抓取PDF。请注意,它只能在基于 Unix 的系统(Linux、mac os)上正常工作,因为 PowerShell 无法处理"\r"
:
import sys
import requests
link = "http://indy/abcde1245"
file_name = "download.data"
with open(file_name, "wb") as f:
print("Downloading %s" % file_name)
response = requests.get(link, stream=True)
total_length = response.headers.get('content-length')
if total_length is None: # no content length header
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
done = int(50 * dl / total_length)
sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) )
sys.stdout.flush()
它使用requests 库,因此您需要安装它。这会在您的控制台中输出如下内容:
>下载download.data
>[==============]
进度条在脚本中是 52 个字符宽(2[]
个字符就是进度的 50 个字符)。每个=
代表 2% 的下载量。
您可以使用“ clint ”包(由与“requests”相同的作者编写)为您的下载添加一个简单的进度条,如下所示:
from clint.textui import progress
r = requests.get(url, stream=True)
path = '/some/path/for/file.txt'
with open(path, 'wb') as f:
total_length = int(r.headers.get('content-length'))
for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1):
if chunk:
f.write(chunk)
f.flush()
这将为您提供如下所示的动态输出:
[################################] 5210/5210 - 00:00:01
它也应该适用于多个平台!您还可以使用 .dots 和 .mill 而不是 .bar 将条形图更改为点或微调器。
享受!
带有 TQDM 的 Python 3
这是TQDM 文档中建议的技术。
import urllib.request
from tqdm import tqdm
class DownloadProgressBar(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)
def download_url(url, output_path):
with DownloadProgressBar(unit='B', unit_scale=True,
miniters=1, desc=url.split('/')[-1]) as t:
urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
import requests
from tqdm import tqdm
def download(url: str, fname: str):
resp = requests.get(url, stream=True)
total = int(resp.headers.get('content-length', 0))
with open(fname, 'wb') as file, tqdm(
desc=fname,
total=total,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
要点:https ://gist.github.com/yanqd0/c13ed29e29432e3cf3e7c38467f42f51
您也可以使用click。它有一个很好的进度条库:
import click
with click.progressbar(length=total_size, label='Downloading files') as bar:
for file in files:
download(file)
bar.update(file.size)
抱歉回复晚了;刚刚更新了tqdm
文档:
https://github.com/tqdm/tqdm/#hooks-and-callbacks
使用urllib.urlretrieve
和 OOP:
import urllib
from tqdm.auto import tqdm
class TqdmUpTo(tqdm):
"""Provides `update_to(n)` which uses `tqdm.update(delta_n)`."""
def update_to(self, b=1, bsize=1, tsize=None):
"""
b : Blocks transferred so far
bsize : Size of each block
tsize : Total size
"""
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n) # will also set self.n = b * bsize
eg_link = "https://github.com/tqdm/tqdm/releases/download/v4.46.0/tqdm-4.46.0-py2.py3-none-any.whl"
eg_file = eg_link.split('/')[-1]
with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
desc=eg_file) as t: # all optional kwargs
urllib.urlretrieve(
eg_link, filename=eg_file, reporthook=t.update_to, data=None)
t.total = t.n
或使用requests.get
和文件包装器:
import requests
from tqdm.auto import tqdm
eg_link = "https://github.com/tqdm/tqdm/releases/download/v4.46.0/tqdm-4.46.0-py2.py3-none-any.whl"
eg_file = eg_link.split('/')[-1]
response = requests.get(eg_link, stream=True)
with tqdm.wrapattr(open(eg_file, "wb"), "write", miniters=1,
total=int(response.headers.get('content-length', 0)),
desc=eg_file) as fout:
for chunk in response.iter_content(chunk_size=4096):
fout.write(chunk)
你当然可以混搭技术。
另一个不错的选择是wget
:
import wget
wget.download('http://download.geonames.org/export/zip/US.zip')
输出将如下所示:
11% [........ ] 73728 / 633847
来源:https ://medium.com/@petehouston/download-files-with-progress-in-python-96f14f6417a2
该tqdm
软件包现在包含一个旨在处理这种情况的函数:wrapattr
. 您只需包装对象的read
(或write
)属性,其余的由 tqdm 处理。这是一个简单的下载功能,将所有内容与requests
:
def download(url, filename):
import functools
import pathlib
import shutil
import requests
import tqdm
r = requests.get(url, stream=True, allow_redirects=True)
if r.status_code != 200:
r.raise_for_status() # Will only raise for 4xx codes, so...
raise RuntimeError(f"Request to {url} returned status code {r.status_code}")
file_size = int(r.headers.get('Content-Length', 0))
path = pathlib.Path(filename).expanduser().resolve()
path.parent.mkdir(parents=True, exist_ok=True)
desc = "(Unknown total file size)" if file_size == 0 else ""
r.raw.read = functools.partial(r.raw.read, decode_content=True) # Decompress if needed
with tqdm.tqdm.wrapattr(r.raw, "read", total=file_size, desc=desc) as r_raw:
with path.open("wb") as f:
shutil.copyfileobj(r_raw, f)
return path
# 定义进度条函数
def print_progressbar(total, current, barsize=60):
progress = int(current*barsize/total)
completed = str(int(current*100/total)) + '%'
print('[', chr(9608)*progress, ' ', completed, '.'*(barsize-progress), '] ', str(i)+'/'+str(total), sep='', end='\r', flush=True)
# 示例代码
total = 6000
barsize = 60
print_frequency = max(min(total//barsize, 100), 1)
print("Start Task..", flush=True)
for i in range(1, total+1):
if i%print_frequency == 0 or i == 1:
print_progressbar(total, i, barsize)
print("\nFinished", flush=True)
# 进度条截图:
下面的行仅用于说明。在命令提示符下,您将看到显示增量进度的单个进度条。
[ 0%............................................................] 1/6000
[██████████ 16%..................................................] 1000/6000
[████████████████████ 33%........................................] 2000/6000
[██████████████████████████████ 50%..............................] 3000/6000
[████████████████████████████████████████ 66%....................] 4000/6000
[██████████████████████████████████████████████████ 83%..........] 5000/6000
[████████████████████████████████████████████████████████████ 100%] 6000/6000
只是对@rich-jones 答案的一些改进
import re
import request
from clint.textui import progress
def get_filename(cd):
"""
Get filename from content-disposition
"""
if not cd:
return None
fname = re.findall('filename=(.+)', cd)
if len(fname) == 0:
return None
return fname[0].replace('"', "")
def stream_download_file(url, output, chunk_size=1024, session=None, verbose=False):
if session:
file = session.get(url, stream=True)
else:
file = requests.get(url, stream=True)
file_name = get_filename(file.headers.get('content-disposition'))
filepath = "{}/{}".format(output, file_name)
if verbose:
print ("Downloading {}".format(file_name))
with open(filepath, 'wb') as f:
total_length = int(file.headers.get('content-length'))
for chunk in progress.bar(file.iter_content(chunk_size=chunk_size), expected_size=(total_length/chunk_size) + 1):
if chunk:
f.write(chunk)
f.flush()
if verbose:
print ("Finished")
您可以在此处流式传输下载 ->流式下载。
您也可以Stream Uploads。
除非您尝试仅使用 2 行访问 response.content ,否则最重要的流式请求已完成
for line in r.iter_lines():
if line:
print(line)