我想使用scrapy抓取完整的网站,但现在它只抓取单个页面
import scrapy
from scrapy.http import HtmlResponse
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.exporter import JsonItemExporter
class IzodspiderSpider(scrapy.Spider):
name = 'izodspider'
allowed_domains = ['izod.com']
start_urls = ['http://izod.com/']
rules = [Rule(SgmlLinkExtractor(), callback='parse_item', follow=True)]
def parse(self, response):
hxs = scrapy.Selector(response)
meta = hxs.xpath('//meta[@name=\'description\']/@content').extract()
name = hxs.xpath('//div[@id=\'product-details\']/h5').extract()
desc = hxs.xpath('//div[@id=\'product-details\']/p').extract()
有什么方法可以使用 portia 提取元标记?