它只需要一个小补丁。
函数 GoogleSearch._extract_result(search.py 的第 237 行)调用 GoogleSearch._extract_description(第 258 行)失败,导致 _extract_result 对于大多数结果返回 None,因此显示的结果少于预期。
使固定:
在 search.py 中,将第 259 行更改为:
desc_div = result.find('div', {'class': re.compile(r'\bs\b')})
对此:
desc_div = result.find('span', {'class': 'st'})
我测试使用:
#!/usr/bin/python
#
# This program does a Google search for "quick and dirty" and returns
# 200 results.
#
from xgoogle.search import GoogleSearch, SearchError
class give_me(object):
def __init__(self, query, target):
self.gs = GoogleSearch(query)
self.gs.results_per_page = 50
self.current = 0
self.target = target
self.buf_list = []
def __iter__(self):
return self
def next(self):
if self.current >= self.target:
raise StopIteration
else:
if(not self.buf_list):
self.buf_list = self.gs.get_results()
self.current += 1
return self.buf_list.pop(0)
try:
sites = {}
for res in give_me("quick and dirty", 200):
t_dict = \
{
"title" : res.title.encode('utf8'),
"desc" : res.desc.encode('utf8'),
"url" : res.url.encode('utf8')
}
sites[t_dict["url"]] = t_dict
print t_dict
except SearchError, e:
print "Search failed: %s" % e