以下函数将从 Google 搜索结果中提取 URL。它在dev_appserver
(localhost)中运行良好,但是当我将它部署在 Google App Engine 上时,它显示了一些错误。
程序:
def googleSearch(keyword):
from re import findall
from urllib2 import build_opener
from urllib import quote, unquote
urlregex = r'<a[ ]href="/url\?q=(http://.+?)[&]'
searchURL = 'https://www.google.com/search?q=' + quote(keyword, safe = '') # https will exclude Cached results
#Google
opener = build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
pagesource = opener.open(searchURL).read()
result = findall(urlregex, pagesource)
print result
resultlist = []
for url in result:
resultlist.append(unquote(url))
resultlist = sorted(set(resultlist), key=resultlist.index)
return resultlist
GAE 错误:
Internal Server Error
The server has either erred or is incapable of performing the requested operation.
Traceback (most recent call last):
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 1535, in __call__
rv = self.handle_exception(request, response, e)
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 1529, in __call__
rv = self.router.dispatch(request, response)
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 1278, in default_dispatcher
return route.handler_adapter(request, response)
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 1102, in __call__
return handler.dispatch()
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 572, in dispatch
return self.handle_exception(e, self.app.debug)
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.2/webapp2.py", line 570, in dispatch
return method(*args, **kwargs)
File "/base/data/home/apps/s~crawlnsearch/1.370098233684025667/main.py", line 56, in get
result = googleSearch(q)
File "/base/data/home/apps/s~crawlnsearch/1.370098233684025667/goosearch.py", line 15, in googleSearch
pagesource = opener.open(searchURL).read()
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 410, in open
response = meth(req, response)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 523, in http_response
'http', request, response, code, msg, hdrs)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 442, in error
result = self._call_chain(*args)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 629, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 410, in open
response = meth(req, response)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 523, in http_response
'http', request, response, code, msg, hdrs)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 448, in error
return self._call_chain(*args)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 531, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 503: Service Unavailable
有没有人知道这个问题的解决方案。