1

我正在尝试使用 r 包下载推文的实时流{rtweet}
似乎当我使用不受欢迎的主题标签时,我收到以下消息:

流式推文 600 秒...
流过早断开连接。重新连接...

当我使用流行的主题标签时,这似乎并不经常发生,
但是一段时间后流最终会断开连接......

我正在使用 while 循环和 if else 每 10 分钟将新推文附加到 googlesheet。我的代码还为每次运行的迭代添加了一个带有时间戳的列。

谁能帮我理解为什么它似乎适用于流行的推文(即#trump)而不是目前不流行的东西?如果在 10 分钟的流式传输期间没有找到带有索引主题标签的推文,则该代码应该附加一个空行。当它工作时,它工作得很好,但也许我需要改变一些东西,以便不受欢迎或非趋势的推文不会导致它断开连接?

我已经阅读了这篇似乎表明类似问题的上一篇文章,但我认为我不应该每隔 10 分钟就收到此错误。链接到上一个问题我尝试使用流行的主题标签和 3 小时间隔运行此脚本,但在运行一夜之后,它仍然过早断开连接。

在连接断开的情况下,如何在 rstudio 中自动重新运行脚本?说,如果发生这种情况时我不在我的电脑附近?

非常感谢任何帮助。

library(rtweet)
library(googlesheets4)
library(googlesheets)
library(googledrive)

googlesheets4::sheets_auth(email = "someemail",
                       token = "somestring")



ss <- sheets_get("URL to googlesheet goes here") 

while (Sys.Date() < "2020-02-15"){

newtweets <- stream_tweets(q = "some hashtags go here", 
                         timeout = 60*10, 
                         file_name = NULL,
                         parse = TRUE)

if(is.null(newtweets)) {

newtweets <- data.frame(matrix(ncol = 91, nrow= 1))
colnames(newtweets) <- c("user_id", "status_id", "created_at", "screen_name", "text", 
                         "source", "display_text_width", "reply_to_status_id", "reply_to_user_id", 
                         "reply_to_screen_name", "is_quote", "is_retweet", "favorite_count", 
                         "retweet_count", "quote_count", "reply_count", "hashtags", "symbols", 
                         "urls_url", "urls_t.co", "urls_expanded_url", "media_url", "media_t.co", 
                         "media_expanded_url", "media_type", "ext_media_url", "ext_media_t.co", 
                         "ext_media_expanded_url", "ext_media_type", "mentions_user_id", 
                         "mentions_screen_name", "lang", "quoted_status_id", "quoted_text", 
                         "quoted_created_at", "quoted_source", "quoted_favorite_count", 
                         "quoted_retweet_count", "quoted_user_id", "quoted_screen_name", 
                         "quoted_name", "quoted_followers_count", "quoted_friends_count", 
                         "quoted_statuses_count", "quoted_location", "quoted_description", 
                         "quoted_verified", "retweet_status_id", "retweet_text", "retweet_created_at", 
                         "retweet_source", "retweet_favorite_count", "retweet_retweet_count", 
                         "retweet_user_id", "retweet_screen_name", "retweet_name", "retweet_followers_count", 
                         "retweet_friends_count", "retweet_statuses_count", "retweet_location", 
                         "retweet_description", "retweet_verified", "place_url", "place_name", 
                         "place_full_name", "place_type", "country", "country_code", "geo_coords", 
                         "coords_coords", "bbox_coords", "status_url", "name", "location", 
                         "description", "url", "protected", "followers_count", "friends_count", 
                         "listed_count", "statuses_count", "favourites_count", "account_created_at", 
                         "verified", "profile_url", "profile_expanded_url", "account_lang", 
                         "profile_banner_url", "profile_background_url", "profile_image_url", "iteration")

} else {

newtweets[1:dim(newtweets)[1], "iteration"] <- as.character(Sys.time())
newtweets <- apply(newtweets, c(1,2), as.character)
newtweets <- as.data.frame(newtweets) 


}

sheets_append(newtweets, ss = ss, sheet = "mytweets")



}
4

0 回答 0