0

谁能告诉我我做错了什么?我不断收到此代码错误。

我正在尝试从小学游戏下载所有 swf 文件,只是作为一个实验,但我似乎做不到:

#!/usr/bin/env python
# encoding: utf-8

import sys, getopt
import os, urllib, urllib2, re, string, math

help_message = '''
'''
no_param = '''
'''

verbose = False
fakeMode = False
curPath = os.getcwd() + "/"

urlRegex = ''
FileRegex = ''
outputPath = ''
currentFile = ''

def removeDuplicates(seq):
# Not order preserving
keys = {}
for e in seq:
    keys[e] = 1
return keys.keys()

def go(filename):
print "Having a look at " + string.capwords(filename)

global urlRegex, FileRegex, outputPath, currentFile

url = 'http://cdn.primarygames.com' + filename

urlRegex = '/'+filename+'/.+/download'
FileRegex = '/'+filename+'/(.*?)/download'
outputPath = curPath+"Swfs"+"/"

if not os.path.exists(outputPath):
    os.makedirs(outputPath)

filelist = []

while(len(url)):
    # looping system
    newlist, url = scrapePage(url, filename)
    filelist.extend(newlist)

print 'Found %s Files.' % len(filelist)

for swf in filelist:
    swfurl = swf['url']
    name = swf['name']
    currentFile = name
    #print 'Downloading '+name,
    if not fakeMode:
        #print ''
        urllib.urlretrieve('http://cdn.primarygames.com' + swfurl, outputPath+name)
    else:
        print 'Not downloading %s.' % name
print "All done with %s!" % filename

def scrapePage(url, filename):
print 'Looking through '+url
html = urllib2.urlopen(url).read()
swflist = re.findall(urlRegex, html)
swflist = removeDuplicates(swflist)

swfs = []

for swfurl in swflist:
    r = re.compile(FileRegex)
    swfname = r.search(swfurl).group(1)
    swfname = swfname.replace('-', ' ')
    name = filename + "/" + swfname + ".swf"
    name = string.capwords(name)
    swf.append({'name':name,'url':swfurl})

r = re.compile(nextRegex)
result = r.search(html)
if result:
    nextUrl = 'http://cdn.primarygames.com' + result.group(1)
else:
    nextUrl = ''

return swfs, nextUrl


def main(argv=None):
global verbose, fakeMode

if argv is None:
    argv = sys.argv
try:
    try:
        opts, args = getopt.getopt(argv[1:], "ho:vf", ["help", "output="])
    except getopt.error, msg:
        raise Usage(msg)

    # option processing
    for option, value in opts:
        if option == "-v":
            verbose = True
        if option in ("-f", "--fake"):
            fakeMode = True
        if option in ("-h", "--help"):
            raise Usage(help_message)
        if option in ("-o", "--output"):
            output = value

    if len(args):
        swfs = args
    else:
        raise Usage(no_param)

except Usage, err:
    print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
    if err.msg != help_message:
        print >> sys.stderr, "\t for help use --help"
    return 2

for swf in swfs:
    go(swf)


if __name__ == "__main__":
sys.exit(main())

这是我不断收到的错误:

Having a look at *
Looking through http://cdn.primarygames.com/*
Traceback (most recent call last):
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 129, in <module>
sys.exit(main())
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 125, in main
go(swf)
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 48, in go
newlist, url = scrapePage(url, filename)
File "C:\Users\Terrii\Desktop\VB Extra's\PrimarySchoolGames Swf Downloader.py"
, line 67, in scrapePage
html = urllib2.urlopen(url).read()
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 400, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 418, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 378, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1177, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 11004] getaddrinfo failed>
4

1 回答 1

1

失败getaddrinfo通常表明您提供的 URL 有问题。既然我能够解析地址,你确定你不在代理服务器后面吗?这可能会导致 DNS 查找失败,从而导致出现此消息。

Python 如何确定在 Windows 上使用哪个代理:

在 Windows 环境中,如果未设置代理环境变量,则从注册表的 Internet 设置部分获取代理设置。

如需更多帮助,我同意@MikeHunter。我试图修复你的代码,但由于我必须实现你的异常类才能让代码运行,我认为你应该重新缩进你的代码并提供更多信息。对不起。

于 2012-12-08T19:03:13.397 回答