0

我有一个 R 脚本,它使用streamR. 它不是 100% 可重现的,因为您需要来自 Twitter 的自己的 oauth 令牌来运行它。每次我以交互模式运行脚本时,它都会成功完成。但是,我每小时都在 cronjob 中运行它,我收到不同的零星错误消息,但它有时也会成功完成。我在底部列出了错误消息。看来长时间data.frame通话有问题。由于错误看起来是乱码和随机的,当您尝试一次将大量 R 代码粘贴到终端并且某些字符被打乱时,这似乎是同一个问题。我已经尝试在我的 cronjob 中使用两者RscriptR CMD BATCH但错误仍然存​​在。

library(ROAuth)
library(streamR)

## load oauth token
load("/home/ubuntu/Documents/Scripts/TwitterScrapes/mr_oauth.dat")

keywords <- "switch Sprint"
tweets <- filterStream( file="", track=keywords, oauth=my_oauth, timeout=3400)
tweets <- parseTweets(tweets)

tweets$text <- as.character(tweets$text)
tweets$text <- gsub("\n", "", tweets$text)
tweets$text <- gsub("[^[:alnum:] ]", "", tweets$text)
tweets$description <- gsub("[^[:alnum:] ]", "", tweets$description)
tweets$description <- gsub("\n", "", tweets$description)
tweets$name <- gsub("[^[:alnum:] ]", "", tweets$name)
tweets$location <- gsub("[^[:alnum:] ]", "", tweets$location)

tweets$att <- 0
tweets$sprint <- 0
tweets$verizon <- 0
tweets$tmobile <- 0
tweets$aio <- 0
tweets$cricket <- 0
tweets$gosmart <- 0
tweets$metropcs <- 0
tweets$virgin <- 0
tweets$boost <- 0
tweets$usc <- 0



cleaned <- data.frame(To = "", From = "", Phone.Availability = 0, Phone.Price = 0, Family.Plan = 0, Coverage.Availability = 0, Coverage.Quality = 0,
    Customer.Service =0,Data.Plan = 0, Upgrade.Plan = 0, Device.Promo = 0, Service.Promo = 0, Outage = 0, Plan.Price = 0, Wireline = 0,
    Wireline.Programming =0, Corportate = 0, Na = 0,Switch.Phrase = "", tweet = tweets$text, Date=tweets$created_at, Location = tweets$location,
    S.reviewed = 0, att = tweets$att, verizon = tweets$verizon,sprint = tweets$sprint, tmobile = tweets$tmobile, aio = tweets$aio,
    cricket = tweets$cricket, gosmart = tweets$gosmart, metro = tweets$metro,boost = tweets$boost, virgin = tweets$virgin, usc = tweets$usc,
    Idstr = tweets$id_str, Retweet = tweets$retweeted, Retweet_Count = tweets$retweet_count,In.reply.to.status.id = tweets$in_reply_to_status_id_str,
    In.reply.to.id = tweets$in_reply_to_user_id_str, Listed.count = tweets$listed_count,Verified = tweets$verified, Usr.id.str = tweets$user_id_str,
    Description = tweets$description, Geo.enabled = tweets$geo_enabled,Usr.created.at = tweets$user_created_at, Statuses.count = tweets$statuses_count,
    Followers.count = tweets$followers_count, Favorites = tweets$favourites_count,Name = tweets$name, Lang=tweets$lang, Utc.offset = tweets$utc_offset,
    Friends.count = tweets$friends_count, Screen.name = tweets$screen_name, Country.code = tweets$country_code, Country=tweets$country,
    Place.type = tweets$place_type, Full.name = tweets$full_name, Place.name = tweets$place_name, Place.id = tweets$place_id, source=tweets$source)

错误示例:

Error in data.frame(To = "", From = "", Phone.Availability = 0, Phone.Price = 0,  : 
object 'tweetsd_str' not found
Execution halted

Error in `$<-.data.frame`(`*tmp*`, "place_lat", value = c(NaN, NaN)) : 
replacement has 2 rows, data has 0
Calls: parseTweets -> $<- -> $<-.data.frame
Execution halted
4

0 回答 0