5

我在 Ubuntu 12.04 (amd64) 上使用 PyQt 4.9.1(尝试使用 python 2.6 和 2.7)制作无头浏览器,但我得到:程序收到信号 SIGSEGV,分段错误。这是该程序的简化版本(仍然有点长):

# -*- coding: utf-8 -*-
from pyvirtualdisplay import Display
display = Display(visible=False, size=(1024, 768), color_depth=24)
display.start()

from PyQt4.QtGui import QApplication
#from PySide.QtGui import QApplication
app = QApplication([])
import qt4reactor
qt4reactor.install()

from twisted.web import server
from twisted.web.xmlrpc import XMLRPC
from twisted.internet import defer
from PyQt4.QtWebKit import QWebSettings, QWebView, QWebPage
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
from PyQt4.QtCore import QUrl, QByteArray, QTimer


class CustomQNetworkAccessManager(QNetworkAccessManager):

    def __init__(self, *args, **kwargs):
        super(CustomQNetworkAccessManager, self).__init__(*args, **kwargs)
        self.sslErrors.connect(self._ssl_errors)

    def _ssl_errors(self, reply, errors):
        reply.ignoreSslErrors()
        for error in errors:
            print 'Ignored SSL Error: {0} - {1}'.format(error.error(), error.errorString())


class CustomQWebPage(QWebPage):

    def __init__(self, *args, **kwargs):
        super(CustomQWebPage, self).__init__(*args, **kwargs)

    def userAgentForUrl(self, url):
        return 'Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.0'


class WebkitWrapper(QWebView):

    def __init__(self, *args, **kwargs):
        super(WebkitWrapper, self).__init__(*args, **kwargs)
        self.network_manager = CustomQNetworkAccessManager()
        web_page = CustomQWebPage()
        web_page.setNetworkAccessManager(self.network_manager)
        self.setPage(web_page)
        settings = self.settings()
        settings.setAttribute(QWebSettings.AutoLoadImages, True)
        settings.setAttribute(QWebSettings.JavaEnabled, False)
        settings.setAttribute(QWebSettings.JavascriptEnabled, False)
        settings.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
        settings.setAttribute(QWebSettings.PluginsEnabled, False)
        self.loadFinished.connect(self._load_finished)

    def perform(self, request_data, timeout=15):
        self._deferred_request = defer.Deferred()
        url = request_data.get('url', '')
        request = QNetworkRequest() 
        request.setUrl(QUrl(url)) 
        self.load(request)
        print 'getting: {0}'.format(url)
        return self._deferred_request

    def _load_finished(self, ok):
        print 'load finished: {0}'.format(ok)
        frame = self.page().mainFrame()
        result = frame.toHtml()
        self._deferred_request.callback(result)


class HeadlessBrowser(object):

    def __init__(self, instance_id):
        self.webkit_wrapper = WebkitWrapper()
        self.instance_id = instance_id

    def _return_intance_id(self, result):
        return self.instance_id, result

    def _request_failed(self, failure):
        failure.trap(Exception)
        err_msg = failure.getErrorMessage()
        print err_msg
        print failure.getTraceback()
        return err_msg

    def shutdown(self):
        self.webkit_wrapper.close()

    def get_request(self, request_data):
        d = self.webkit_wrapper.perform(request_data)
        d.addErrback(self._request_failed)
        d.addBoth(self._return_intance_id)
        return d


class TestXMLRPCServer(XMLRPC):

    def __init__(self):
        XMLRPC.__init__(self, allowNone=True)
        self.browser_instances = dict()
        self.instance_counter = 0

    def _result_returned(self, result):
        instance_id, browser_result = result
        print 'killing instance: {0}'.format(instance_id)
        browser_instance = self.browser_instances.pop(instance_id)
        browser_instance.shutdown()
        return browser_result

    def xmlrpc_open(self, request_data):
        print 'requested: {0}'.format(request_data)
        self.instance_counter += 1
        browser = HeadlessBrowser(self.instance_counter)
        self.browser_instances[self.instance_counter] = browser

        deferred_result = browser.get_request(request_data)
        deferred_result.addCallback(self._result_returned)
        return deferred_result


def start_server(port=8297):
    from twisted.internet import reactor
    r = TestXMLRPCServer()
    reactor.listenTCP(port, server.Site(r))
    reactor.run()


if __name__ == '__main__':
    start_server()

现在据我所知,问题在于存储这些 HeadlessBrowser 类的实例,我使用 dict 来存储它们,在实际实现中我通过自定义 session_id 重用它们,但我在这里伪造了一个 instance_counter 只是为了展示它是如何工作的。我认为无论我将它们存储在字典中,看起来这些实例都是垃圾收集的,或者我不确定为什么会发生问题。无论如何,如果我放弃 dict 的使用,并使其仅在 xmlrpc 类上存储一个实例作为类属性,则不会发生问题。

这是它的示例客户端:

# -*- coding: utf-8 -*-
import xmlrpclib

def test_server(port=8297):
    s = xmlrpclib.Server('http://localhost:{0}/'.format(port))
    html = s.open({'url': 'http://www.microsoft.com'})
    print html

if __name__ == '__main__':
    test_server()

有什么建议么?

更新:添加回溯:

(gdb) run
Starting program ...
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
[New Thread 0x7fffe8f60700 (LWP 13393)]
Xlib:  extension "RANDR" missing on display ":1851".
[New Thread 0x7fffe3fff700 (LWP 13394)]
[New Thread 0x7fffe37fe700 (LWP 13395)]
requested: {'url': 'http://www.microsoft.com'}
[New Thread 0x7fffd9a2f700 (LWP 13422)]
[New Thread 0x7fffd9116700 (LWP 13423)]
[New Thread 0x7fffcfdd2700 (LWP 13425)]
getting: http://www.microsoft.com
[New Thread 0x7fffcf5d1700 (LWP 13426)]
[New Thread 0x7fffc5f28700 (LWP 13427)]
[Thread 0x7fffe37fe700 (LWP 13395) exited]
load finished: True
killing instance: 1

Program received signal SIGSEGV, Segmentation fault.
QMetaObject::activate (sender=0x0, m=<optimized out>, local_signal_index=8, argv=0x7fffffffc960) at kernel/qobject.cpp:3456
3456    kernel/qobject.cpp: No such file or directory.
(gdb) bt
#0  QMetaObject::activate (sender=0x0, m=<optimized out>, local_signal_index=8, argv=0x7fffffffc960) at kernel/qobject.cpp:3456
#1  0x00007fffdad358b2 in QWebFrame::loadFinished (this=<optimized out>, _t1=true) at ./moc_qwebframe.cpp:239
#2  0x00007fffdad74e08 in WebCore::FrameLoaderClientQt::dispatchDidFinishLoad (this=0x1428290) at WebCoreSupport/FrameLoaderClientQt.cpp:527
#3  0x00007fffdb0cfcbb in WebCore::FrameLoader::recursiveCheckLoadComplete (this=0x7fffd9141478) at loader/FrameLoader.cpp:2641
#4  0x00007fffdb100754 in WebCore::SubresourceLoader::didFinishLoading (this=0x7fffc5f3d300, finishTime=0) at loader/SubresourceLoader.cpp:202
#5  0x00007fffdb2f033b in WebCore::QNetworkReplyHandler::finish (this=0x14adcb0) at platform/network/qt/QNetworkReplyHandler.cpp:454
#6  0x00007fffdb2f01ea in flush (this=0x14adce8) at platform/network/qt/QNetworkReplyHandler.cpp:195
#7  WebCore::QNetworkReplyHandlerCallQueue::flush (this=0x14adce8) at platform/network/qt/QNetworkReplyHandler.cpp:187
#8  0x00007fffdb2f0255 in WebCore::QNetworkReplyHandlerCallQueue::push (this=0x14adce8, method=
    (void (WebCore::QNetworkReplyHandler::*)(WebCore::QNetworkReplyHandler * const)) 0x7fffdb2f0260 <WebCore::QNetworkReplyHandler::finish()>)
    at platform/network/qt/QNetworkReplyHandler.cpp:164
#9  0x00007fffdb2f0c8c in WebCore::QNetworkReplyWrapper::didReceiveFinished (this=0x14af650) at platform/network/qt/QNetworkReplyHandler.cpp:349
#10 0x00007ffff482f281 in QMetaObject::activate (sender=0x14ae120, m=<optimized out>, local_signal_index=<optimized out>, argv=0x0) at kernel/qobject.cpp:3547
#11 0x00007fffe0ea5fe6 in QNetworkReplyImplPrivate::finished (this=0x14ae210) at access/qnetworkreplyimpl.cpp:795
#12 0x00007fffe0f1c655 in QNetworkAccessHttpBackend::qt_static_metacall (_o=0x14ae5c0, _c=<optimized out>, _id=<optimized out>, _a=<optimized out>)
    at .moc/release-shared/moc_qnetworkaccesshttpbackend_p.cpp:90
#13 0x00007ffff4834446 in QObject::event (this=0x14ae5c0, e=<optimized out>) at kernel/qobject.cpp:1195
#14 0x00007ffff4d3d894 in notify_helper (e=0x7fffc8019be0, receiver=0x14ae5c0, this=0x9d0e30) at kernel/qapplication.cpp:4559
#15 QApplicationPrivate::notify_helper (this=0x9d0e30, receiver=0x14ae5c0, e=0x7fffc8019be0) at kernel/qapplication.cpp:4531
#16 0x00007ffff4d42713 in QApplication::notify (this=0x966ab0, receiver=0x14ae5c0, e=0x7fffc8019be0) at kernel/qapplication.cpp:4420
#17 0x00007ffff5d4c016 in ?? () from .../lib/python2.6/site-packages/PyQt4/QtGui.so
#18 0x00007ffff481ae9c in QCoreApplication::notifyInternal (this=0x966ab0, receiver=0x14ae5c0, event=0x7fffc8019be0) at kernel/qcoreapplication.cpp:876
#19 0x00007ffff481ec6a in sendEvent (event=0x7fffc8019be0, receiver=0x14ae5c0) at ../../include/QtCore/../../src/corelib/kernel/qcoreapplication.h:231
#20 QCoreApplicationPrivate::sendPostedEvents (receiver=0x0, event_type=0, data=0x9d1000) at kernel/qcoreapplication.cpp:1500
#21 0x00007ffff4849f93 in sendPostedEvents () at ../../include/QtCore/../../src/corelib/kernel/qcoreapplication.h:236
#22 postEventSourceDispatch (s=<optimized out>) at kernel/qeventdispatcher_glib.cpp:279
#23 0x00007ffff3a8ec9a in g_main_context_dispatch () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
---Type <return> to continue, or q <return> to quit---
#24 0x00007ffff3a8f060 in ?? () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
#25 0x00007ffff3a8f124 in g_main_context_iteration () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
#26 0x00007ffff484a3bf in QEventDispatcherGlib::processEvents (this=0xb7dfb0, flags=...) at kernel/qeventdispatcher_glib.cpp:424
#27 0x00007ffff4de5d5e in QGuiEventDispatcherGlib::processEvents (this=<optimized out>, flags=...) at kernel/qguieventdispatcher_glib.cpp:204
#28 0x00007ffff4819c82 in QEventLoop::processEvents (this=<optimized out>, flags=...) at kernel/qeventloop.cpp:149
#29 0x00007ffff4819ed7 in QEventLoop::exec (this=0xea6cd0, flags=...) at kernel/qeventloop.cpp:204
#30 0x00007ffff0d627e2 in ?? () from .../lib/python2.6/site-packages/PyQt4/QtCore.so
#31 0x000000000049a15d in PyEval_EvalFrameEx ()
#32 0x000000000049be0f in PyEval_EvalCodeEx ()
#33 0x000000000049a57a in PyEval_EvalFrameEx ()
#34 0x000000000049be0f in PyEval_EvalCodeEx ()
#35 0x000000000049a57a in PyEval_EvalFrameEx ()
#36 0x000000000049be0f in PyEval_EvalCodeEx ()
#37 0x000000000049bef2 in PyEval_EvalCode ()
#38 0x00000000004be6e0 in PyRun_FileExFlags ()
#39 0x00000000004bf3d7 in PyRun_SimpleFileExFlags ()
#40 0x0000000000418850 in Py_Main ()
#41 0x00007ffff68e576d in __libc_start_main () from /lib/x86_64-linux-gnu/libc.so.6
#42 0x0000000000417ab1 in _start ()
(gdb) 
4

1 回答 1

10

我为迟到的回复道歉,最后我有时间发布我的问题的解决方案。基本上发生段错误是因为在删除对浏览器实例的最后引用之前没有删除 qt 对象。这是固定代码:

# -*- coding: utf-8 -*-
from pyvirtualdisplay import Display
display = Display(visible=False, size=(1024, 768), color_depth=24)
display.start()

from PyQt4.QtGui import QApplication
app = QApplication([])
import qt4reactor
qt4reactor.install()

from twisted.web import server
from twisted.web.xmlrpc import XMLRPC
from twisted.internet import defer
from PyQt4.QtWebKit import QWebSettings, QWebView, QWebPage
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
from PyQt4.QtCore import QUrl, Qt


class CustomQNetworkAccessManager(QNetworkAccessManager):

    def __init__(self, *args, **kwargs):
        super(CustomQNetworkAccessManager, self).__init__(*args, **kwargs)
        self.sslErrors.connect(self._ssl_errors)
        self.finished.connect(self._finished)

    def _ssl_errors(self, reply, errors):
        reply.ignoreSslErrors()
        for error in errors:
            print 'Ignored SSL Error: {0} - {1}'.format(error.error(), error.errorString())

    def _finished(self, reply):
        reply.deleteLater()


class CustomQWebPage(QWebPage):

    def __init__(self, *args, **kwargs):
        super(CustomQWebPage, self).__init__(*args, **kwargs)

    def userAgentForUrl(self, url):
        return 'Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.0'


class WebkitWrapper(object):

    def __init__(self, *args, **kwargs):
        super(WebkitWrapper, self).__init__(*args, **kwargs)
        self.web_view = QWebView()
        self.network_manager = CustomQNetworkAccessManager()
        self.web_page = CustomQWebPage()
        self.web_page.setNetworkAccessManager(self.network_manager)
        self.web_view.setPage(self.web_page)
        self.web_view.setAttribute(Qt.WA_DeleteOnClose, True)
        settings = self.web_view.settings()
        settings.setAttribute(QWebSettings.AutoLoadImages, False)
        settings.setAttribute(QWebSettings.JavaEnabled, False)
        settings.setAttribute(QWebSettings.JavascriptEnabled, False)
        settings.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
        settings.setAttribute(QWebSettings.PluginsEnabled, False)
        self.web_view.loadFinished.connect(self._load_finished)

    def perform(self, request_data, timeout=15):
        self._deferred_request = defer.Deferred()
        url = request_data.get('url', '')
        request = QNetworkRequest() 
        request.setUrl(QUrl(url)) 
        self.web_view.load(request)
        print 'getting: {0}'.format(url)
        return self._deferred_request

    def shutdown(self):
        print 'webview shutdown'
        self.web_view.close()
        self.network_manager.deleteLater()
        self.web_page.deleteLater()
        self.web_view.deleteLater()
        print 'deletelater scheduled'

    def _load_finished(self, ok):
        print 'load finished: {0}'.format(ok)
        frame = self.web_view.page().mainFrame()
        result = unicode(frame.toHtml()).encode('utf-8')
        self._deferred_request.callback(result)


class HeadlessBrowser(object):

    def __init__(self, instance_id):
        self.webkit_wrapper = WebkitWrapper()
        self.instance_id = instance_id

    def _return_intance_id(self, result):
        return self.instance_id, result 

    def _request_failed(self, failure):
        failure.trap(Exception)
        err_msg = failure.getErrorMessage()
        print err_msg
        failure.printDetailedTraceback()
        return err_msg

    def shutdown(self):
        self.webkit_wrapper.shutdown()

    def _run_perform(self, _r, request_data):
        return self.webkit_wrapper.perform(request_data)

    def get_request(self, request_data):
        deferred_request = self.webkit_wrapper.perform(request_data)
        deferred_request.addErrback(self._request_failed)
        deferred_request.addBoth(self._return_intance_id)
        return deferred_request


class TestXMLRPCServer(XMLRPC):

    def __init__(self):
        XMLRPC.__init__(self, allowNone=True)
        self.browser_instances = dict()
        self.instance_counter = 0

    def _request_completed(self, result):
        instance_id, dest_result = result
        print 'instances: ', self.browser_instances
        print 'killing instance: {0}'.format(instance_id)
        browser_instance = self.browser_instances.pop(instance_id, None)
        browser_instance.shutdown()
        print 'instances: ', self.browser_instances
        return dest_result

    def xmlrpc_open(self, request_data):
        print 'requested: {0}'.format(request_data)
        self.instance_counter += 1
        instance_id = str(self.instance_counter)
        browser = HeadlessBrowser(instance_id)
        def_request = browser.get_request(request_data)
        def_request.addCallback(self._request_completed)
        self.browser_instances[instance_id] = browser
        return def_request


def start_server(port=8297):
    from twisted.internet import reactor
    r = TestXMLRPCServer()
    reactor.listenTCP(port, server.Site(r))
    reactor.run()


if __name__ == '__main__':
    start_server()

一切都只需要添加这一行:

self.web_page.deleteLater()

但最好确定..希望这将为某人节省一些调试时间..

于 2012-05-21T10:41:18.293 回答