0

以下脚本应该获取特定的行号并从实时网站解析它。它适用于 30 个循环,但似乎 enumerate(f) 停止正常工作...... for 循环中的“i”似乎停止在第 130 行,而不是像 200 一样。这可能是由于我试图从中获取数据的网站或其他原因吗?谢谢!!

import sgmllib

class MyParser(sgmllib.SGMLParser):
"A simple parser class."

def parse(self, s):
    "Parse the given string 's'."
    self.feed(s)
    self.close()

def __init__(self, verbose=0):
    "Initialise an object, passing 'verbose' to the superclass."

    sgmllib.SGMLParser.__init__(self, verbose)
    self.divs = []
    self.descriptions = []
    self.inside_div_element = 0

def start_div(self, attributes):
    "Process a hyperlink and its 'attributes'."

    for name, value in attributes:
        if name == "id":
            self.divs.append(value)
            self.inside_div_element = 1

def end_div(self):
    "Record the end of a hyperlink."

    self.inside_div_element = 0

def handle_data(self, data):
    "Handle the textual 'data'."

    if self.inside_div_element:
        self.descriptions.append(data)


def get_div(self):
    "Return the list of hyperlinks."

    return self.divs

def get_descriptions(self, check):
    "Return a list of descriptions."
if check == 1:
    self.descriptions.pop(0)
    return self.descriptions

def rm_descriptions(self):
"Remove all descriptions."

self.descriptions.pop()

import urllib
import linecache
import sgmllib


tempLine = ""
tempStr = " "
tempStr2 = ""
myparser = MyParser()
count = 0
user = ['']
oldUser = ['none']  
oldoldUser = [' ']
array = [" ", 0]
index = 0
found = 0    
k = 0
j = 0
posIndex = 0
a = 0
firstCheck = 0
fCheck = 0
while a < 1000:

print a
f = urllib.urlopen("SITE")
a = a+1

for i, line in enumerate(f):


    if i == 187:
        print i
        tempLine = line
        print line

        myparser.parse(line)
        if fCheck == 1:
            result  = oldUser[0] is oldUser[1]

            u1 = oldUser[0]
            u2 = oldUser[1]
            tempStr = oldUser[1]
            if u1 == u2:
                result = 1
        else:
            result = user is oldUser
        fCheck = 1

        user = myparser.get_descriptions(firstCheck)
        tempStr = user[0]
        firstCheck = 1



        if result:

            array[index+1] = array[index+1] +0

        else:
            j = 0

            for z in array:
                k = j+2

                tempStr2 = user[0]
                if k < len(array) and tempStr2 == array[k]: 

                    array[j+3] = array[j+3] + 1
                    index = j+2
                    found = 1
                    break
                j = j+1
            if found == 0:

                array.append(tempStr)
                array.append(0)


        oldUser = user
        found = 0
        print array


    elif i > 200:
        print "HERE"
        break



print array
f.close()
4

2 回答 2

0

旁白:你的缩进是while a < 1000:在行之后填充的。过多的空行和一个字母的名称无助于您的代码的理解。

enumerate没有坏。而不是这样的猜测,检查你的数据。建议:更换

for i, line in enumerate(f):

经过

lines = list(f)
print "=== a=%d linecount=%d === % (a, len(lines))
for i, line in enumerate(lines):
    print "   a=%d i=%d line=%r" % (a, i, line)

仔细检查输出。

于 2011-05-13T23:18:07.060 回答
0

也许该网页上的行数比您想象的要少?这给了你什么?:

print max(i for i, _ in enumerate(urllib.urlopen("SITE")))
于 2011-05-13T22:10:33.050 回答