我试图找出用户对所有给定推文的回复数量。这不是直接从 Twitter 的 API 获得的东西。我决定只关注用户关注者的回复,以帮助提取生成的数据并作为一个很好的近似值(我相信对推文的回复中的 msot 将直接来自该用户的关注者。
我相信我已经走了很长一段路,我只是需要最后一部分的帮助。我正在努力使我创建的功能运行在所有追随者身上。
我宁愿这个解决方案在 R 中而不是 Python 中,尽管我知道这存在并且将是一个选项。我还为唐纳德特朗普添加了推特标签;我不是想为他做这件事,我知道他的大量追随者会让这成为一个挑战。我想要一个可用于输入任何用户的通用版本。
library(rtweet)
library(plyr)
library(dplyr)
##set name of tweeter to look at (this can be changed)
targettwittername <- "realDonaldTrump"
##get this tweeter's timeline
tmls <- get_timeline(targettwittername, n=3200, retryonratelimit=TRUE)
##get their user id
targettwitteruserid <- as.numeric(select(lookup_users(targettwittername), user_id))
##get ids of their tweets
tweetids <- select(tmls, status_id)
tweetids <- transform(tweetids, status_id_num=as.numeric(status_id))
##get list of followers (who are most likely to reply)
targetfollowers <- data.frame(get_followers(targettwittername))
##clean up follower list to exclude those that have never tweeted and restricted access
user_lookup <- lookup_users(targetfollowers)
users_with_tweets_and_unprotected <- filter(user_lookup, statuses_count != 0)
users_with_tweets_and_unprotected <- select(filter(users_with_tweets_and_unprotected, protected != "TRUE"), user_id)
targetfollowers <- filter(targetfollowers, user_id %in% users_with_tweets_and_unprotected$user_id)
##custom function to search all followers timelines one by one
getfollowersreplies <- function(x){
follower <- as.numeric(x[1])
followertl <- data.frame(get_timeline(follower, n=3200, retryonratelimit=TRUE))
followertl <- filter(followertl, in_reply_to_status_user_id == targettwitteruserid)
followertl <- transform(followertl, reply_to_status_id_num=as.numeric(in_reply_to_status_status_id))
join <- inner_join(followertl, tweetids, by=c("reply_to_status_id_num"="status_id_num"))
replycounts <- data.frame(
join %>%
group_by(user_id, reply_to_status_id_num) %>%
summarise(n=n())
)
return(replycounts)
}
tweet_replies <- do.call("rbind", lapply(targetfollowers$user_id, getfollowersreplies))