我正在尝试缩短 urlSet 中的许多 URL。以下代码大部分时间都有效。但有时需要很长时间才能完成。例如,我在 urlSet 中有 2950。stderr 告诉我 2900 已完成,但 getUrlMapping 未完成。
def getUrlMapping(urlSet):
# get the url mapping
urlMapping = {}
#rs = (grequests.get(u) for u in urlSet)
rs = (grequests.head(u) for u in urlSet)
res = grequests.imap(rs, size = 100)
counter = 0
for x in res:
counter += 1
if counter % 50 == 0:
sys.stderr.write('Doing %d url_mapping length %d \n' %(counter, len(urlMapping)))
urlMapping[ getOriginalUrl(x) ] = getGoalUrl(x)
return urlMapping
def getGoalUrl(resp):
url=''
try:
url = resp.url
except:
url = 'NULL'
return url
def getOriginalUrl(resp):
url=''
try:
url = resp.history[0].url
except IndexError:
url = resp.url
except:
url = 'NULL'
return url