我正在使用图像管道从不同的网站下载所有图像。
所有图像都已成功下载到我定义的文件夹中,但在保存到硬盘之前,我无法命名我选择的下载图像。
这是我的代码
管道.py
class jellyImagesPipeline(ImagesPipeline):
def image_key(self, url, item):
name = item['image_name']
return 'full/%s.jpg' % (name)
def get_media_requests(self, item, info):
print'Entered get_media_request'
for image_url in item['image_urls']:
yield Request(image_url)
Image_spider.py
def getImage(self, response):
item = JellyfishItem()
item['image_urls']= [response.url]
item['image_name']= response.meta['image_name']
return item
我需要在我的代码中做哪些更改?
更新 1
管道.py
class jellyImagesPipeline(ImagesPipeline):
def image_custom_key(self, response):
print '\n\n image_custom_key \n\n'
name = response.meta['image_name'][0]
img_key = 'full/%s.jpg' % (name)
print "custom image key:", img_key
return img_key
def get_images(self, response, request, info):
print "\n\n get_images \n\n"
for key, image, buf, in super(jellyImagesPipeline, self).get_images(response, request, info):
yield key, image, buf
key = self.image_custom_key(response)
orig_image = Image.open(StringIO(response.body))
image, buf = self.convert_image(orig_image)
yield key, image, buf
def get_media_requests(self, item, info):
print "\n\nget_media_requests\n"
return [Request(x, meta={'image_name': item["image_name"]})
for x in item.get('image_urls', [])]
更新 2
def image_key(self, image_name):
print 'entered into image_key'
name = 'homeshop/%s.jpg' %(image_name)
print name
return name
def get_images(self,request):
print '\nEntered into get_images'
key = self.image_key(request.url)
yield key
def get_media_requests(self, item, info):
print '\n\nEntered media_request'
print item['image_name']
yield Request(item['image_urls'][0], meta=dict(image_name=item['image_name']))
def item_completed(self, results, item, info):
print '\n\nentered into item_completed\n'
print 'Name : ', item['image_urls']
print item['image_name']
for tuple in results:
print tuple