我正在尝试解析来自谷歌搜索结果的链接并最终得到奇怪的输出。
import mechanize, re, lxml.html
from lxml.html import parse
br = mechanize.Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.set_handle_robots(False)
url = 'https://www.google.com/search?q=test&gl=US'
response = br.open(url)
html = response.read().lower()
doc = lxml.html.document_fromstring(html)
for t in doc.xpath("//h3[@class='r']/a"):
print t.get('href')
任何帮助都会很棒,谢谢