1

下面的代码工作正常,除了一件事:所需的输出打印几次=)

#! /usr/bin/env python2.7

from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from bs4 import BeautifulSoup as bs
from random import choice
from urllib import urlretrieve
from urllib2 import *
import sys
import os


# Settings for browser
class MyBrowser(QWebPage):
    def __init__(self):
        QWebPage.__init__(self)
        # Specifies whether images are automatically loaded in web pages.
        self.settings().setAttribute(QWebSettings.AutoLoadImages, False)

    def userAgentForUrl(self, url):
        return "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15"



class Name_Creater(QWebView):
    def __init__(self):
        QWebView.__init__(self)
        self.setPage(MyBrowser())
        self.loadFinished.connect(self.grab_first_name)
        self.frame = self.page().mainFrame()

    def grab_first_name(self):
        html = unicode(self.frame.toHtml()).encode('utf-8')
        soup = bs(html)
        for name in soup.findAll('li', text=True):
            print name

if __name__ == '__main__':
    app = QApplication(sys.argv)
    url_first_names = QUrl("http://www.genealogyroadtrip.com/Census/male_names_1.htm")
    br = Name_Creater()
    br.load(url_first_names)
    br.show()
    app.exec_()
4

1 回答 1

1

问题是您正在连接到 的loadFinished信号,该信号将为每个加载的页面QWebView发出一次。所以如果有多个帧,就会发出多个信号。loadFinished

解决方案是连接到mainFrameloadFinished的信号:

self.setPage(MyBrowser())
self.frame = self.page().mainFrame()
self.frame.loadFinished.connect(self.grab_first_name)
于 2012-12-05T01:56:35.933 回答