1

我正在尝试编写一个小型 python 应用程序,使用PySide作为 GUI 和Twython作为 Twitter API 库,以捕获来自 Twitter 的流。

我遇到的问题是,当我单击“开始监控 Twitter”按钮时,UI 会冻结,直到流完成,此时代码继续执行并禁用“开始”按钮并启用“停止”按钮。这是用户界面:

在此处输入图像描述

其他一切似乎都正常工作——如果我离开它,那么我怀疑会创建 CSV 文件——Twython 组件似乎按预期工作。

第 151 行是当我单击开始时从 Twitter 流式传输的位置:

self.stream.statuses.filter(track=self.search_term)

如何将流式传输移动到单独的线程,然后使用 UI 上的停止按钮告诉 Twython 完成捕获流并退出?

我需要能够将MyStreamer实例发送到另一个线程,然后向它发送.disconnect()信号以使其终止捕获流。

这是完整的代码:

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import platform
import PySide

from PySide.QtGui   import QApplication, QMainWindow, QPushButton, QCheckBox, QTextEdit
from time           import sleep
from ui_tweetstream import Ui_MainWindow
from twython        import Twython
from twython        import TwythonStreamer

import csv

class MainWindow(QMainWindow, Ui_MainWindow):
    def __init__(self, parent=None):
        super(MainWindow, self).__init__(parent)
        self.setupUi(self)

        # Set up Variables
        self.tweet_fav_count           = True
        self.tweet_geocoordinates      = True
        self.tweet_id                  = True
        self.tweet_language            = True
        self.tweet_orig_tweet_id       = True
        self.tweet_orig_username       = True
        self.tweet_retweeted           = True
        self.tweet_sensitive           = True
        self.tweet_source_app          = True
        self.tweet_timestamp           = True
        self.tweet_user_name           = True
        self.search_term               = "#bigdata"
        self.tweets_to_get             = 1000

        # Bind the interface
        self.check_tweet_fav_count.clicked.connect(self.setTweetFavCount)
        self.check_tweet_geocoordinates.clicked.connect(self.setTweetGeocoordinates)
        self.check_tweet_id.clicked.connect(self.setTweetID)
        self.check_tweet_language.clicked.connect(self.setTweetLanguage)
        self.check_tweet_orig_tweet_id.clicked.connect(self.setTweetOrigTweetID)
        self.check_tweet_orig_username.clicked.connect(self.setTweetOrigUsername)
        self.check_tweet_retweeted.clicked.connect(self.setTweetRetweeted)
        self.check_tweet_sensitive.clicked.connect(self.setTweetSensitive)
        self.check_tweet_source_app.clicked.connect(self.setTweetSourceApp)
        self.check_tweet_timestamp.clicked.connect(self.setTweetTimestamp)
        self.check_tweet_user_name.clicked.connect(self.setTweetUsername)
        self.button_start.clicked.connect(self.streamStart)
        self.button_stop.clicked.connect(self.streamStop)

        # Set the initial states
        self.button_stop.setEnabled(False)

        APP_KEY            = ''
        APP_SECRET         = ''
        OAUTH_TOKEN        = ''
        OAUTH_TOKEN_SECRET = ''

        self.t = Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
        self.stream = MyStreamer(APP_KEY,APP_SECRET,OAUTH_TOKEN,OAUTH_TOKEN_SECRET)
        self.stream.init_mainWindow(self)

    def streamStop(self):
        print "Stopping stream"
        # Enable other controls here
        self.button_stop.setEnabled(False)
        self.button_start.setEnabled(True)
        self.setControlStates(True)
        self.stream.stopStream()

    def setControlStates(self, state):
        self.check_tweet_fav_count.setEnabled(state)
        self.check_tweet_geocoordinates.setEnabled(state)
        self.check_tweet_id.setEnabled(state)
        self.check_tweet_language.setEnabled(state)
        self.check_tweet_orig_tweet_id.setEnabled(state)
        self.check_tweet_orig_username.setEnabled(state)
        self.check_tweet_retweeted.setEnabled(state)
        self.check_tweet_sensitive.setEnabled(state)
        self.check_tweet_source_app.setEnabled(state)
        self.check_tweet_timestamp.setEnabled(state)
        self.check_tweet_user_name.setEnabled(state)
        self.search_box.setEnabled(state)
        self.num_tweets_box.setEnabled(state)

    # Functions for determining what to track
    def setTweetFavCount(self):
        self.tweet_fav_count = not self.tweet_fav_count
        print "tweet_fav_count:", self.tweet_fav_count

    def setTweetGeocoordinates(self):
        self.tweet_geocoordinates = not self.tweet_geocoordinates
        print "tweet_geocoordinates:", self.tweet_geocoordinates

    def setTweetID(self):
        self.tweet_id = not self.tweet_id
        print "tweet_id:", self.tweet_id

    def setTweetLanguage(self):
        self.tweet_language = not self.tweet_language
        print "tweet_language:", self.tweet_language

    def setTweetOrigTweetID(self):
        self.tweet_orig_tweet_id = not self.tweet_orig_tweet_id
        print "tweet_orig_tweet_id:", self.tweet_orig_tweet_id

    def setTweetOrigUsername(self):
        self.tweet_orig_username = not self.tweet_orig_tweet_id
        print "tweet_orig_username:", self. tweet_orig_username

    def setTweetRetweeted(self):
        self.tweet_retweeted = not self.tweet_retweeted
        print "tweet_retweeted:", self.tweet_retweeted

    def setTweetSensitive(self):
        self.tweet_sensitive = not self.tweet_sensitive
        print "tweet_sensitive:", self.tweet_sensitive

    def setTweetSourceApp(self):
        self.tweet_source_app = not self.tweet_source_app
        print "tweet_source_app:", self.tweet_source_app

    def setTweetTimestamp(self):
        self.tweet_timestamp = not self.tweet_timestamp
        print "tweet_timestamp:", self.tweet_timestamp

    def setTweetUsername(self):
        self.tweet_user_name = not self.tweet_user_name
        print "tweet_user_name:", self.tweet_user_name

    # Functions for starting and stopping the stream
    def streamStart(self):
        print "Starting stream"
        self.setControlStates(False)

        # Disable other controls here
        self.button_start.setEnabled(False)
        self.button_stop.setEnabled(True)

        # Hack to try to disable the UI
        # sleep(0.25)

        # Get the active search term
        self.search_term = self.search_box.text()

        # Get the number of tweets
        self.tweets_to_get = int(self.num_tweets_box.text())

        # Set the streamer
        self.stream.set_start_criteria(self.tweets_to_get)
        self.stream.statuses.filter(track=self.search_term)



class MyStreamer(TwythonStreamer):
    def init_mainWindow(self, the_main_window):
        self.main_window = the_main_window
        self.stop        = False
        self.header_done = False

    def set_start_criteria(self, numTweets):
        self.maxTweets   = numTweets
        self.tweetCount  = 0
        print "Number of tweets to get:", self.maxTweets

    def stopStream(self):
        self.stop = True

    def on_success(self, data):
        if 'text' in data:
            self.tweetCount += 1
            print "tweetCount:", self.tweetCount
            #tweet = data['text'].encode('utf-8')
            theTweet = data
            writer   = TweetMonkey()
            writer.assignMainWindow(self.main_window, self.header_done)
            self.header_done = True
            writer.process(theTweet)

        # Want to disconnect after the first result?
        if self.stop is True or self.tweetCount >= self.maxTweets:
            self.disconnect()

    def on_error(self, status_code, data):
        print status_code, data




class TweetMonkey:
    def assignMainWindow(self,the_main_window, is_header_done):
        self.main_window = the_main_window
        self.header_done = is_header_done

    def clean(self,text):
        text = text.replace("\n","; ")
        text = text.replace('"', "'")
        text = text.replace(','," ")
        return text

    def create_header(self):
        header = []
        tweets = open("tweets.csv", 'ab+')
        wr     = csv.writer(tweets, dialect='excel')

        if self.main_window.tweet_id is True:
            header.append("id")
        if self.main_window.tweet_language is True:
            header.append("lang")
        if self.main_window.tweet_user_name is True:
            header.append("user_name")

        header.append("tweet")

        if self.main_window.tweet_retweeted is True:
            header.append("retweeted")

        if self.main_window.tweet_fav_count is True:
            header.append("favorite_count")

        if self.main_window.tweet_source_app is True:
            header.append("source")

        if self.main_window.tweet_orig_tweet_id is True:
            header.append("in_reply_to_status_id")

        if self.main_window.tweet_orig_username is True:
            header.append("in_reply_to_screen_name")

        # header.append("in_reply_to_user_id")

        if self.main_window.tweet_sensitive is True:
            header.append("possibly_sensitive")

        if self.main_window.tweet_geocoordinates is True:
            header.append("geo")

        if self.main_window.tweet_timestamp is True:
            header.append("created_at")

        wr.writerow(header)
        tweets.close()

    def process(self, tweet):
        if not self.header_done:
            self.create_header()
            self.header_done = True

        # Create the file or append to the existing
        theOutput = []
        tweets = open("tweets.csv", 'ab+')
        wr = csv.writer(tweets, dialect='excel')

        if self.main_window.tweet_id is True:
            theOutput.append(tweet['id'])

        if self.main_window.tweet_language is True:
            theOutput.append(tweet['lang'].encode('utf-8'))

        if self.main_window.tweet_user_name is True:
            theOutput.append(tweet['user']['name'].encode('utf-8', 'replace'))

        theOutput.append(self.clean(tweet['text']).encode('utf-8', 'replace'))

        if self.main_window.tweet_retweeted is True:
            theOutput.append(tweet['retweeted'])

        if self.main_window.tweet_fav_count is True:
            theOutput.append(tweet['favorite_count'])

        if self.main_window.tweet_source_app is True:
            theOutput.append(self.clean(tweet['source']).encode('utf-8', 'replace'))

        if self.main_window.tweet_orig_tweet_id is True:
            theOutput.append(tweet['in_reply_to_status_id'])

        if self.main_window.tweet_orig_username is True:
            theOutput.append(tweet['in_reply_to_screen_name'])

        #theOutput.append(tweet['in_reply_to_user_id'])

        if self.main_window.tweet_sensitive is True:
            if tweet.get('possibly_sensitive'):
                theOutput.append(tweet['possibly_sensitive'])
            else:
                theOutput.append("False")

        if self.main_window.tweet_geocoordinates is True:
            if tweet['geo'] is not None:
                if tweet['geo']['type'] == 'Point':
                    lat = str(tweet['geo']['coordinates'][0]) + " "
                    lon = str(tweet['geo']['coordinates'][1])
                    theOutput.append(lat + lon)
                else:
                    theOutput.append(tweet['geo'])
            else:
                theOutput.append(tweet['geo'])

        if self.main_window.tweet_timestamp is True:
            theOutput.append(tweet['created_at'])

        wr.writerow(theOutput)
        tweets.close()





if __name__ == '__main__':
    app = QApplication(sys.argv)
    frame = MainWindow()
    frame.show()
    app.exec_()
4

1 回答 1

1

我知道这是一篇旧帖子,但我在最近编写的一个简单应用程序中遇到了类似的问题,我的解决方案是使用线程。

我使用的工人来自: https ://pymotw.com/2/threading/

和描述的方法: http: //aadrake.com/using-twitter-as-a-stream-processing-source.html

基本上将 Twython 流作为一个单独的线程将文本提供给队列,然后我在从队列读取的单独循环中运行程序的其余部分。

于 2015-11-04T02:56:19.673 回答