我正在编写一个网络爬虫,使用 wxpython 来显示实时结果。假设crawl
窗口上只有一个命名按钮。当我单击按钮时,会出现一个新对话框,新对话框上的 TextCtrl 将显示当前正在抓取的 url。
代码可以简化如下(只是带有WebCrawler
线程OnDisplayClick
功能的 UI):
# -*- coding: utf-8 -*-
import wx
class Main ( wx.Frame ):
def __init__( self, parent ):
wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = wx.EmptyString, pos = wx.DefaultPosition, size = wx.Size( 500,300 ), style = wx.DEFAULT_FRAME_STYLE|wx.TAB_TRAVERSAL )
self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize )
bSizer3 = wx.BoxSizer( wx.VERTICAL )
self.Crawl = wx.Button( self, wx.ID_ANY, u"Crawl", wx.DefaultPosition, wx.DefaultSize, 0 )
self.Crawl.SetDefault()
bSizer3.Add( self.Crawl, 0, wx.ALL, 5 )
self.SetSizer( bSizer3 )
self.Layout()
self.Centre( wx.BOTH )
# Connect Events
self.Crawl.Bind( wx.EVT_BUTTON, self.OnDisplayClick )
def __del__( self ):
pass
# Virtual event handlers, overide them in your derived class
def OnDisplayClick( self, event ):
#Show the display window
newDisplay = Display(self)
newDisplay.show()
############################################################
## start a multi-threading webcrawler ##
############################################################
web_crawler = WebCrawler(newDisplay.current_url)
web_crawler.startCrawl()
class Display ( wx.Frame ):
def __init__( self, parent ):
wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = wx.EmptyString, pos = wx.DefaultPosition, size = wx.Size( 500,300 ), style = wx.DEFAULT_FRAME_STYLE|wx.TAB_TRAVERSAL )
self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize )
bSizer4 = wx.BoxSizer( wx.VERTICAL )
self.cur_url = wx.StaticText( self, wx.ID_ANY, u"Current_URL: ", wx.DefaultPosition, wx.DefaultSize, 0 )
self.cur_url.Wrap( -1 )
bSizer4.Add( self.cur_url, 0, wx.ALL, 5 )
self.current_url = wx.TextCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.DefaultSize, 0 )
bSizer4.Add( self.current_url, 0, wx.ALL, 5 )
self.SetSizer( bSizer4 )
self.Layout()
self.Centre( wx.BOTH )
def __del__( self ):
pass
用户界面:
WebCrawler 是一个多线程爬虫,我将 TextCtrl( current_url
) 传递给 WebCrawler 让它在显示窗口上显示当前爬取的 url,但是当我点击爬取按钮时,界面似乎死了,我猜是因为多线程 WebCrawler 正在运行,UI 线程无法获得显示新窗口的机会。我尝试使用threading.Thread编写另外两个线程,一个用于显示新窗口,一个用于WebCrawler,但我失败了,该应用程序经常立即退出,尽管它可以显示窗口并处理爬行线程几秒钟,有时它告诉我类似:
(python2.7:5231): Pango-CRITICAL **: pango_layout_get_iter: assertion `PANGO_IS_LAYOUT (layout)' failed
(python2.7:5404): GLib-GObject-CRITICAL **: g_object_ref: assertion `object->ref_count > 0' failed
这两个主题如下:
class UpdateThread(threading.Thread):
""" WebCrawler thread """
def __init__(self, webCrawl):
threading.Thread.__init__(self)
self.webCrawl = webCrawl
def run(self):
self.webCrawl.start()
class CrawlShowThread(threading.Thread):
""" Display thread """
def __init__(self, crawl_display):
threading.Thread.__init__(self)
self.crawl_display = crawl_display
def run(self):
self.crawl_display.Show()
然后在 OnCrawlClick() 函数中都 start()。但就像我上面所说的那样,该方法不起作用。
谁能告诉我处理这些事情的正确方法是什么?任何帮助表示赞赏!