我有以下代码,它应该从 csv 文件中的给定 url 将图像下载到指定目录中。目录都设置好了。
with open('images.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
next(csv_reader)
for row in csv_reader:
basename = os.path.basename(urlparse(row[0]).path)
filename = '{}/{}/{}'.format(row[2], row[1], basename)
urllib.request.urlretrieve(row[0], filename)
csv 文件按以下方式组织:
http://farm2.static.flickr.com/1245/1259825348_6a2aa94e8d.jpg,cat,train
http://farm1.static.flickr.com/146/350588612_d84d71cc59.jpg,cat,test
http://farm1.static.flickr.com/32/99029168_940da3a1e5.jpg,cat,val
但是当我执行代码时,出现以下错误。我今天才弄清楚如何使用 python 从 url 下载图像,所以我非常感谢在这件事上的所有帮助!:
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-36-6e201d3625d3> in <module>
5 basename = os.path.basename(urlparse(row[0]).path)
6 filename = '{}/{}/{}'.format(row[2], row[1], basename)
----> 7 urllib.request.urlretrieve(row[0], filename)
~\Anaconda3\lib\urllib\request.py in urlretrieve(url, filename, reporthook, data)
245 url_type, path = splittype(url)
246
--> 247 with contextlib.closing(urlopen(url, data)) as fp:
248 headers = fp.info()
249
~\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
529 for processor in self.process_response.get(protocol, []):
530 meth = getattr(processor, meth_name)
--> 531 response = meth(req, response)
532
533 return response
~\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
639 if not (200 <= code < 300):
640 response = self.parent.error(
--> 641 'http', request, response, code, msg, hdrs)
642
643 return response
~\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
561 http_err = 0
562 args = (dict, proto, meth_name) + args
--> 563 result = self._call_chain(*args)
564 if result:
565 return result
~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
~\Anaconda3\lib\urllib\request.py in http_error_302(self, req, fp, code, msg, headers)
753 fp.close()
754
--> 755 return self.parent.open(new, timeout=req.timeout)
756
757 http_error_301 = http_error_303 = http_error_307 = http_error_302
~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
529 for processor in self.process_response.get(protocol, []):
530 meth = getattr(processor, meth_name)
--> 531 response = meth(req, response)
532
533 return response
~\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
639 if not (200 <= code < 300):
640 response = self.parent.error(
--> 641 'http', request, response, code, msg, hdrs)
642
643 return response
~\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
567 if http_err:
568 args = (dict, 'default', 'http_error_default') + orig_args
--> 569 return self._call_chain(*args)
570
571 # XXX probably also want an abstract factory that knows when it makes
~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
~\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
647 class HTTPDefaultErrorHandler(BaseHandler):
648 def http_error_default(self, req, fp, code, msg, hdrs):
--> 649 raise HTTPError(req.full_url, code, msg, hdrs, fp)
650
651 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 404: Not Found