下面的代码工作正常,除了一件事:所需的输出打印几次=)
#! /usr/bin/env python2.7
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from bs4 import BeautifulSoup as bs
from random import choice
from urllib import urlretrieve
from urllib2 import *
import sys
import os
# Settings for browser
class MyBrowser(QWebPage):
def __init__(self):
QWebPage.__init__(self)
# Specifies whether images are automatically loaded in web pages.
self.settings().setAttribute(QWebSettings.AutoLoadImages, False)
def userAgentForUrl(self, url):
return "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15"
class Name_Creater(QWebView):
def __init__(self):
QWebView.__init__(self)
self.setPage(MyBrowser())
self.loadFinished.connect(self.grab_first_name)
self.frame = self.page().mainFrame()
def grab_first_name(self):
html = unicode(self.frame.toHtml()).encode('utf-8')
soup = bs(html)
for name in soup.findAll('li', text=True):
print name
if __name__ == '__main__':
app = QApplication(sys.argv)
url_first_names = QUrl("http://www.genealogyroadtrip.com/Census/male_names_1.htm")
br = Name_Creater()
br.load(url_first_names)
br.show()
app.exec_()