我有一个屏幕名称列表(几百个)和他们的推文(ID)。通过该列表,我使用 Twitter API 来获取推文状态和转发者的 ID。我正在使用我在下面显示的代码,它工作正常。
问题是,在初始列表中的几个屏幕名称(10-15)之后,python 脚本停止而不打印任何错误。最初,我正在运行脚本:
python get_rt.py > log 2>&1 &
有时不会“捕获”所有打印错误。所以我实现了一个带有日志记录的回溯,并且回溯中没有错误。脚本只是出于某种原因停止。
知道为什么它会停止或我应该如何捕捉错误?
import io,json,sys
import traceback
import logging
#Logging
LOG_FILENAME = 'log_exception.txt'
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
# Tweepy
import tweepy,time
consumer_key = 'XXX'
consumer_secret = 'XXX'
auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth,wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True)
#MongoDB
from pymongo import MongoClient
client = MongoClient('localhost', 27018)
db = client['Twitter']
col_tweets = db['Tweets']
col_users = db['Users']
#Usefull function to save data to DB
def save_user(user_info):
id_str = user_info['id_str']
col_users.update({'id_str':id_str},user_info,upsert = True)
def save_tweet(tweet_info):
id_str = tweet_info['id_str']
col_tweets.update({'id_str':id_str},tweet_info,upsert = True)
screen_names = [...] #list of screen names
try:
for sc_name in screen_names:
print(sc_name)
tweets_data = []
with io.open('tweets_{}.json'.format(sc_name)) as f: #open the file containing the tweets from screen names X
for line in f:
tweet = json.loads(line)
if tweet['retweets_count'] >= 1:
tweets_data.append(tweet)
if not tweets_data:
continue
for i,tweet in enumerate(tweets_data):
t_id = tweet['id']
try:
rts = api.retweets(id = t_id,count = 100,tweet_mode="extended")
if rts:
retweeted_status = rts[0]._json['retweeted_status']
rts_u_ids = []
for rt in rts:
rt = rt._json
user_rt = rt['user']
save_user(user_rt)
rts_u_ids.append(user_rt['id_str'])
retweeted_status['retweeters_ids'] = rts_u_ids
save_tweet(retweeted_status)
except tweepy.TweepError:
time.sleep(60 * 15)
print("Sleeping 60*15")
except Exception as e:
logging.error(traceback.format_exc())