0

当我在代理之外执行程序时,将运行以下代码。当我使用 Windows 7 操作系统在我的工作代理上运行此程序时,我会使用命令提示符获得“[Errno 11004]”或使用 Cygwin 获得“[Errno 8]”。

该程序的目标是成为一个可移植的可执行文件,执行人员可以使用它来捕获我们公司拥有的网站的 HTTP 响应和 URL 重定向。

#!/bin/python
import urllib, urllib2, sys, logging, time

# Variables 
s = time.strftime('%Y%m%d%H%M%S')
f = open("list.txt",'r')

# Logging
class Logger(object):
    def __init__(self):
        self.terminal = sys.stdout
        self.log = open("assets_"+s+".txt", "a")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)  

# Capture logging class
sys.stdout = Logger()

# Text file header
print "ASSET, STATUS, REDIRECT, DATE/TIME"

# Feed program 16,000 URLs
for url in f.readlines():
    try:
        http_connection = 'http://' + (url)
        connection = urllib2.urlopen(http_connection)
        print (url).rstrip("\n"), ",", connection.getcode(), ",", connection.geturl(), ",", (s)
        connection.close()
    except urllib2.URLError as e:
        print e.reason
4

2 回答 2

0

看起来像环境或数据问题。您提到您在代理上运行它 - 有任何限制吗?努力让这条线正常工作。

连接 = urllib2.urlopen(http_connection)

于 2013-08-23T01:12:07.280 回答
-1

这是最终的解决方案。找出与代理无关的错误。

#!/bin/python

import urllib2, sys, logging, time, errno, unittest

# Variables
s = time.strftime('%Y%m%d%H%M%S')
f = open("list.txt",'r')

# Create file text file for importing into database
class Logger(object):
    def __init__(self):
        self.terminal = sys.stdout
        self.log = open("assets_"+s+".txt", "a")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)  

# Start capture of screen output for database file
sys.stdout = Logger()
print "UID, ASSET, STATUS, REDIRECT, DATE/TIME"

# Loop to capture URL status and redirect information
for assets in f.readlines():
    url = assets.split()
    if len(url) > 1:
        try:
            http = 'http://' + url[1]           
            http_connection = urllib2.urlopen(http, timeout=5)
        except IOError, e:
            if e.errno == None:         
                try:
                    www = 'http://www.' + url[1]
                    http_connection = urllib2.urlopen(www, timeout=5)
                except IOError, e:
                    print url[0], ",", url[1].rstrip("\n"), ",", "", ",", e.errno, ",", (s)
                else:
                    print url[0], ",", url[1].rstrip("\n"), ",", http_connection.getcode(), ",", http_connection.geturl(), ",", (s)
        else:
            print url[0], ",", url[1].rstrip("\n"), ",", http_connection.getcode(), ",", http_connection.geturl(), ",", (s)
于 2013-08-26T21:12:53.380 回答