我正在使用scrapy从网站的所有页面获取一些信息。这是我的 dmoz_spider.py 文件。当我执行这个时,我得到 IndentationError。请帮帮我。
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from scrapy.item import Item, Field
import string
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
class EypItem(Item):
title = Field()
link = Field()
price = Field()
review = Field()
class eypSpider(CrawlSpider):
name = "dmoz"
allowed_domains =["http://www.walgreens.com"]
start_urls =["http://www.walgreens.com/search/results.jsp?Ntt=allergy%20medicine"]
rules = (Rule(SgmlLinkExtractor(allow=('/search/results\.jsp', )), callback='parse_item', follow= True),)
def parse_item(self, response):
self.log('Hi, this is an item page! %s' % response.url)
hxs = HtmlXPathSelector(response)
sites = hxs.select('//div[@id="productGrid"]')
items = []
for site in sites:
itemE = EypItem()
itemE["title"] = site.select('//*[@class="image-container"]/a/img/@alt').extract()
itemE["link"] = site.select('//*[@class="image-container"]/a/img/@src').extract()
itemE["price"] = site.select('//*[@class="pricing"]/div/p/text()').extract()
itemE["review"] = site.select('//*[@class="reviewSnippet"]/div/div/span/text()').extract()
items.append(itemE)
return items