我正在尝试使用 HTMLParser 从 HTML 页面中读取标题标签。但是我收到了上述错误。我的课看起来像这样:
读标题.py
from HTMLParser import HTMLParser
import urllib
class MyHTMLParser(HTMLParser):
def __init__(self, url):
HTMLParser.__init__(self)
self.url = url
self.data = urllib.urlopen(url).read()
self.feed(self.data)
self.intitle = ""
self.mytitle = ""
def handle_starttag(self, tag, attrs):
self.intitle = tag == "title"
def handle_data(self, data):
if self.intitle:
self.mytitle = data
return self.mytitle
我使用以下命令运行代码并得到错误:
import urllib
import readtitle
parser = readtitle.MyHTMLParser("http://docs.python.org/tutorial/classes.html")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "readtitle.py", line 10, in __init__
self.feed(self.data)
File "/usr/lib/python2.6/HTMLParser.py", line 108, in feed
self.goahead(0)
File "/usr/lib/python2.6/HTMLParser.py", line 142, in goahead
if i < j: self.handle_data(rawdata[i:j])
File "readtitle.py", line 18, in handle_data
if self.intitle:
AttributeError: MyHTMLParser instance has no attribute 'intitle'