我正在使用 scrapy 构建刮板,我收到以下错误消息,我不知道为什么。我查看了scrapy文档,但不确定是否缺少某些东西。我正在尝试从带有描述的站点下载图像。我希望将描述下载到 CSV 中,并将图像下载到名为 IMG 的文件夹中。
蜘蛛
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from NSIscrape.items import NsiscrapeItem
from scrapy.http import Request
from scrapy.contrib.pipeline.images import ImagesPipeline
class NsiscrapeSpider(BaseSpider):
name = "Nsiscrape"
allowed_domain = ["yachtauctions.com"]
start_urls = [
"http://www.yachtauctions.com/inventory/"
]
def parse(self, response):
hxs = HtmlXPathSelector(response)
sites = hxs.select('//tr')
items = []
for site in sites:
item = NsiscrapeItem()
item['location'] = site.select('td[2]/text()').extract()
item['stock_number'] = site.select('td[3]/a/text()').extract()
item['year'] = site.select('td[4]/text()').extract()
item['manufacturer'] = site.select('td[5]/text()').extract()
item['model'] = site.select('td[6]/text()').extract()
item['length'] = site.select('td[7]/text()').extract()
item['price'] = site.select('td[8]/text()').extract()
item['status'] = site.select('td[10]/img/@src').extract()
item['url'] = site.select('td[1]/a/@href').extract()
item['images'] = site.select('td/a[3]/img/@data-original').extract()
item['image_urls'] = item['images']
yield Request(item['url'][0],
meta={'item':item},
callback=self.product_detail_page)
def product_detail_page(self, response):
hxs = HtmlXPathSelector(response)
item = response.request.meta['item']
#add all images url in the item['image_urls']
yield item
这是我在运行刮板时遇到的错误
raise TypeError('Request url must be str or unicode, got %s:' % type
(url).__name__)
exceptions.TypeError: Request url must be str or unicode, got list: