0

是否可以在 Pyqt4 (QwebPage) 下执行异步(如 asyncio)网络请求?

例如,如何使用此代码并行调用多个 url:

#!/usr/bin/env python3.4

import sys
import signal

from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import QWebPage

class Crawler( QWebPage ):
    def __init__(self, url):
        QWebPage.__init__( self )
        self._url = url
        self.content = ''

    def crawl( self ):
        signal.signal( signal.SIGINT, signal.SIG_DFL )
        self.connect( self, SIGNAL( 'loadFinished(bool)' ), self._finished_loading )
        self.mainFrame().load( QUrl( self._url ) )

    def _finished_loading( self, result ):
        self.content = self.mainFrame().toHtml()
        print(self.content)
        sys.exit( 0 )

    def main():
        app = QApplication( sys.argv )
        crawler = Crawler( self._url, self._file )
        crawler.crawl()
        sys.exit( app.exec_() )

if __name__ == '__main__':
     crawl = Crawler( 'http://www.example.com')
     crawl.main()

谢谢

4

2 回答 2

1

抱歉,您无法self.mainFrame().load(QUrl(self._url))通过 asyncio 进行工作——该方法是在 Qt 本身中实现的。

但是你可以安装quamash事件循环并异步调用aiohttp.request协程来获取网页。

但这种方式并不适用QWebPage

于 2014-12-10T11:33:44.100 回答
0

请求已经异步完成,所以您需要做的就是创建多个QWebPage.

这是一个基于您的示例脚本的简单演示:

import sys, signal
from PyQt4 import QtCore, QtGui, QtWebKit

urls = [
    'http://qt-project.org/doc/qt-4.8/qwebelement.html',
    'http://qt-project.org/doc/qt-4.8/qwebframe.html',
    'http://qt-project.org/doc/qt-4.8/qwebinspector.html',
    'http://qt-project.org/doc/qt-4.8/qwebpage.html',
    'http://qt-project.org/doc/qt-4.8/qwebsettings.html',
    'http://qt-project.org/doc/qt-4.8/qwebview.html',
    ]

class Crawler(QtWebKit.QWebPage):
    def __init__(self, url, identifier):
        super(Crawler, self).__init__()
        self.loadFinished.connect(self._finished_loading)
        self._id = identifier
        self._url = url
        self.content = ''

    def crawl(self):
        self.mainFrame().load(QtCore.QUrl(self._url))

    def _finished_loading(self, result):
        self.content = self.mainFrame().toHtml()
        print('[%d] %s' % (self._id, self._url))
        print(self.content[:250].rstrip(), '...')
        print()
        self.deleteLater()

if __name__ == '__main__':

    app = QtGui.QApplication( sys.argv )
    signal.signal( signal.SIGINT, signal.SIG_DFL)
    crawlers = []
    for index, url in enumerate(urls):
        crawlers.append(Crawler(url, index))
        crawlers[-1].crawl()
    sys.exit( app.exec_() )
于 2014-12-10T18:35:42.110 回答