我正在尝试子类化 ScrapyXPathSelector
并修补以支持 CSS3 选择器。
XPathSelector
定义如下:
class XPathSelector(object_ref):
__slots__ = ['doc', 'xmlNode', 'expr', '__weakref__']
def __init__(self, response=None, text=None, node=None, parent=None, expr=None):
if parent is not None:
self.doc = parent.doc
...
我继承XPathSelector
并覆盖__init__
:
class CSSSelector(XPathSelector):
def __init__(self, *args, **kwargs):
translator = kwargs.get('translator', 'html').lower()
if 'translator' in kwargs:
del kwargs['translator']
super(XPathSelector, self).__init__(*args, **kwargs)
当我尝试使用时,CSSSelector
我得到AttributeError
s和的错误。手动添加这些插槽也无济于事。doc
xmlNode
expr
CSSSelector
用 s 子类化一个类的正确方法是__slot__
什么?
我的完整代码在这里:
"""
Extends `XPathSelector` to allow CSS3 selectors via the `cssselect` library.
"""
from cssselect import HTMLTranslator, GenericTranslator
from scrapy.selector import XPathSelector, XPathSelectorList
__all__ = ['CSSSelector', 'CSSSelectorList']
class CSSSelector(XPathSelector):
__slots__ = ['doc', 'xmlNode', 'expr', 'translator']
def __init__(self, *args, **kwargs):
translator = kwargs.get('translator', 'html').lower()
if 'translator' in kwargs:
del kwargs['translator']
super(CSSSelector, self).__init__(*args, **kwargs)
if translator == 'html':
self.translator = HTMLTranslator()
elif translator == 'xhtml':
self.translator = HTMLTranslator(xhtml=True)
elif translator == 'xml':
self.translator = GenericTranslator()
else:
raise ValueError("Invalid translator: %s. Valid translators are 'html' (default), 'xhtml' and 'xml'." % translator)
def _select_xpath(self, xpath):
if hasattr(self.xmlNode, 'xpathEval'):
self.doc.xpathContext.setContextNode(self.xmlNode)
xpath = unicode_to_str(xpath, 'utf-8')
try:
xpath_result = self.doc.xpathContext.xpathEval(xpath)
except libxml2.xpathError:
raise ValueError("Invalid XPath: %s" % xpath)
if hasattr(xpath_result, '__iter__'):
return CSSSelectorList([self.__class__(node=node, parent=self, expr=xpath) for node in xpath_result])
else:
return CSSSelectorList([self.__class__(node=xpath_result, parent=self, expr=xpath)])
else:
return CSSSelectorList([])
def select(self, selector):
xpath = self.translator.css_to_xpath(selector)
return self._select_xpath(xpath)
def attribute(self, name):
return self._select_xpath('self::@' + name)
def text(self):
return self._select_xpath('self::text()')
class CSSSelectorList(XPathSelectorList):
def attribute(self, name):
return [x.attribute(name) for x in self]
def text(self, name):
return [x.text() for x in self]
我可以很好地初始化类:
>>> css_selector = CSSSelector(response)
但我AttributeError
到处都是:
>>> css_selector.select('title')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-150-d21b0f17d4cc> in <module>()
----> 1 css_selector.select('title')
<ipython-input-147-c855c7eaf9fa> in select(self, selector)
57
58
---> 59 return self._select_xpath(xpath)
60
61
<ipython-input-147-c855c7eaf9fa> in _select_xpath(self, xpath)
34
35 def _select_xpath(self, xpath):
---> 36 if hasattr(self.xmlNode, 'xpathEval'):
37 self.doc.xpathContext.setContextNode(self.xmlNode)
38 xpath = unicode_to_str(xpath, 'utf-8')
AttributeError: xmlNode