r - 从命令行Linux执行R脚本时忽略错误并继续运行

Question

我在我的 linux VPS 上运行了以下 R 脚本，它经常返回错误，从而中断脚本。我不确定如何围绕这些错误进行编程，并且想知道是否有办法强制脚本继续运行。这些错误通常作为“结果”表的越界错误发生。将代码直接粘贴到 R< 时仍然会发生错误，但是当出现对“结果”表的越界引用时，它会保持其先前设置的值 0，因此代码按预期工作。任何有关如何从 linux 命令行示例自动运行的帮助：(Rscript /folder/file.R) 将不胜感激。

library(RMySQL)
library(twitteR)
library(plyr)
library(stringr)
library(sentiment)

Date<-format(Sys.time(),"%Y-%m-%d %H:%M")
Time<-format(Sys.time(),"%H:%M")

tweets.con<-dbConnect(MySQL(),user="xxxxxxxxxxxx",password="xxxxxxxxxxxx",dbname="xxxxxxxxxx",host="xxxxxxxxxxxxxxxxxxxx.com")
Feel<-dbGetQuery(tweets.con,"select `tweet_text` from `tweets` where `created_at` BETWEEN timestamp(DATE_ADD(NOW(), INTERVAL 49 MINUTE)) AND timestamp(DATE_ADD(NOW(), INTERVAL 60 MINUTE))")

length(as.matrix(Feel))
n<-length(as.matrix(Feel))
Total_Count<-length(as.matrix(Feel))

results.con<-dbConnect(MySQL(),user="xxxxxxxxxxx",password="xxxxxxxxxxxxxxxxxx",dbname="xxxxxxxxxxxxxx",host="xxxxxxxxxxxxxxxxxx")
last.results.alt<-dbGetQuery(results.con,"select `Neg_Prop_Alt`,`Neu_Prop_Alt`,`Pos_Prop_Alt`,`neg5_Prop`,`neg4_Prop`,`neg3_Prop`,`neg2_Prop`,`neg1_Prop`,`zero_Prop`,`pos1_Prop`,`pos2_Prop`,`pos3_Prop`,`pos4_Prop`,`pos5_Prop` from `results_10m_alt` ORDER BY Date DESC LIMIT 1")

# function score.sentiment
score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
{
   # Parameters
   # sentences: vector of text to score
   # pos.words: vector of words of postive sentiment
   # neg.words: vector of words of negative sentiment
   # .progress: passed to laply() to control of progress bar

   # create simple array of scores with laply
   scores = laply(sentences,
   function(sentence, pos.words, neg.words)
   {

# remove retweet entities
sentence = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", sentence)
# remove at people
sentence = gsub("@\\w+", "", sentence)
# remove punctuation
sentence = gsub("[[:punct:]]", "", sentence)
# remove numbers
sentence = gsub("[[:digit:]]", "", sentence)
# remove control characters
sentence = gsub("[[:cntrl:]]", "", sentence)
# remove html links
sentence = gsub("http\\w+", "", sentence)
# remove unnecessary spaces
sentence = gsub("[ \t]{2,}", "", sentence)
sentence = gsub("^\\s+|\\s+$", "", sentence)

      # define error handling function when trying tolower
      tryTolower = function(x)
      {
         # create missing value
         y = NA
         # tryCatch error
         try_error = tryCatch(tolower(x), error=function(e) e)
         # if not an error
         if (!inherits(try_error, "error"))
         y = tolower(x)
         # result
         return(y)
      }
      # use tryTolower with sapply 
      sentence = sapply(sentence, tryTolower)

      # split sentence into words with str_split (stringr package)
      word.list = str_split(sentence, "\\s+")
      words = unlist(word.list)

      # compare words to the dictionaries of positive & negative terms
      pos.matches = match(words, pos.words)
      neg.matches = match(words, neg.words)

      # get the position of the matched term or NA
      # we just want a TRUE/FALSE
      pos.matches = !is.na(pos.matches)
      neg.matches = !is.na(neg.matches)

      # final score
      score = sum(pos.matches) - sum(neg.matches)
      return(score)
      }, pos.words, neg.words, .progress=.progress )

   # data frame with scores for each sentence
   scores.df = data.frame(text=sentences, score=scores)
   return(scores.df)
}

# import positive and negative words
pos = readLines("/home/jgraab/R/scripts/positive_words.txt")
neg = readLines("/home/jgraab/R/scripts/negative_words.txt")

Feel_txt = sapply(Feel, function(x) gettext(x))

scores.df = score.sentiment(Feel_txt, pos, neg, .progress='text')
results<-table(scores.df[,2])+.0001

#Set Table Defaults
Neg_Count_Alt<-0
Neg_Prop_Alt<-0
Neg_Change_Alt<-0
Neu_Count_Alt<-0
Neu_Prop_Alt<-0
Neu_Change_Alt<-0
Pos_Count_Alt<-0
Pos_Prop_Alt<-0
Pos_Change_Alt<-0
neg5_Count<-0
neg5_Prop<-0
neg5_Change<-0
neg4_Count<-0
neg4_Prop<-0
neg4_Change<-0
neg3_Count<-0
neg3_Prop<-0
neg3_Change<-0
neg2_Count<-0
neg2_Prop<-0
neg2_Change<-0
neg1_Count<-0
neg1_Prop<-0
neg1_Change<-0
zero_Count<-0
zero_Prop<-0
zero_Change<-0
pos1_Count<-0
pos1_Prop<-0
pos1_Change<-0
pos2_Count<-0
pos2_Prop<-0
pos2_Change<-0
pos3_Count<-0
pos3_Prop<-0
pos3_Change<-0
pos4_Count<-0
pos4_Prop<-0
pos4_Change<-0
pos5_Count<-0
pos5_Prop<-0
pos5_Change<-0

#Get Table Results
neg5_Count<-results[["-5"]]
neg5_Prop<-neg5_Count/Total_Count
neg5_Change<-(neg5_Prop-as.numeric(last.results.alt[[4]]))/as.numeric(last.results.alt[[4]])*100
neg4_Count<-results[["-4"]]
neg4_Prop<-neg4_Count/Total_Count
neg4_Change<-(neg4_Prop-as.numeric(last.results.alt[[5]]))/as.numeric(last.results.alt[[5]])*100
neg3_Count<-results[["-3"]]
neg3_Prop<-neg3_Count/Total_Count
neg3_Change<-(neg3_Prop-as.numeric(last.results.alt[[6]]))/as.numeric(last.results.alt[[6]])*100
neg2_Count<-results[["-2"]]
neg2_Prop<-neg2_Count/Total_Count
neg2_Change<-(neg2_Prop-as.numeric(last.results.alt[[7]]))/as.numeric(last.results.alt[[7]])*100
neg1_Count<-results[["-1"]]
neg1_Prop<-neg1_Count/Total_Count
neg1_Change<-(neg1_Prop-as.numeric(last.results.alt[[8]]))/as.numeric(last.results.alt[[8]])*100
zero_Count<-results[["0"]]
zero_Prop<-zero_Count/Total_Count
zero_Change<-(zero_Prop-as.numeric(last.results.alt[[9]]))/as.numeric(last.results.alt[[9]])*100
pos1_Count<-results[["1"]]
pos1_Prop<-pos1_Count/Total_Count
pos1_Change<-(pos1_Prop-as.numeric(last.results.alt[[10]]))/as.numeric(last.results.alt[[10]])*100
pos2_Count<-results[["2"]]
pos2_Prop<-pos2_Count/Total_Count
pos2_Change<-(pos2_Prop-as.numeric(last.results.alt[[11]]))/as.numeric(last.results.alt[[11]])*100
pos3_Count<-results[["3"]]
pos3_Prop<-pos3_Count/Total_Count
pos3_Change<-(pos3_Prop-as.numeric(last.results.alt[[12]]))/as.numeric(last.results.alt[[12]])*100
pos4_Count<-results[["4"]]
pos4_Prop<-pos4_Count/Total_Count
pos4_Change<-(pos4_Prop-as.numeric(last.results.alt[[13]]))/as.numeric(last.results.alt[[13]])*100
pos5_Count<-results[["5"]]
Pos5_Prop<-pos5_Count/Total_Count
Pos5_Change<-(pos5_Prop-as.numeric(last.results.alt[[14]]))/as.numeric(last.results.alt[[14]])*100

#Get Negative, Neutral, and Positive Totals
Neg_Count_Alt<-neg5_Count+neg4_Count+neg3_Count+neg2_Count+neg1_Count
Neg_Prop_Alt<-Neg_Count_Alt/Total_Count
Neg_Change_Alt<-(Neg_Prop_Alt-as.numeric(last.results.alt[[1]]))/as.numeric(last.results.alt[[1]])*100
Neu_Count_Alt<-zero_Count
Neu_Prop_Alt<-Neu_Count_Alt/Total_Count
Neu_Change_Alt<-(Neu_Prop_Alt-as.numeric(last.results.alt[[2]]))/as.numeric(last.results.alt[[2]])*100
Pos_Count_Alt<-pos1_Count+pos2_Count+pos3_Count+pos4_Count+pos5_Count
Pos_Prop_Alt<-Pos_Count_Alt/Total_Count
Pos_Change_Alt<-(Pos_Prop_Alt-as.numeric(last.results.alt[[3]]))/as.numeric(last.results.alt[[3]])*100

Mean<-(-5*neg5_Count-4*neg4_Count-3*neg3_Count-2*neg2_Count-neg1_Count+pos1_Count+2*pos2_Count+3*pos3_Count+4*pos4_Count+5*pos5_Count)/Total_Count

Feel_alt.df<-data.frame(Date,Time,Total_Count,Mean,Neg_Count_Alt,Neg_Prop_Alt,Neg_Change_Alt,Neu_Count_Alt,Neu_Prop_Alt,Neu_Change_Alt,Pos_Count_Alt,Pos_Prop_Alt,Pos_Change_Alt,
neg5_Count,neg5_Prop,neg5_Change,neg4_Count,neg4_Prop,neg4_Change,neg3_Count,neg3_Prop,neg3_Change,neg2_Count,neg2_Prop,neg2_Change,neg1_Count,neg1_Prop,neg1_Change,
zero_Count,zero_Prop,zero_Change,pos1_Count,pos1_Prop,pos1_Change,pos2_Count,pos2_Prop,pos2_Change,pos3_Count,pos3_Prop,pos3_Change,pos4_Count,pos4_Prop,pos4_Change,pos5_Count,pos5_Prop,pos5_Change)

dbWriteTable(results.con,name="results_10m_alt",Feel_alt.df,append=T,overwrite=F,row.names=F)

score 1 · Accepted Answer

Use try or tryCatch (the former is simpler and generally all you need). You're already using tryCatch later on, so use it to deal with your problematic query as well.

score 1 · Accepted Answer

这样的代码有很多错误是正常的！

避免定义变量并使用 list 或 data.frame 结构来聚合您的结果。
使用小函数将代码分成小部分。这将有助于trycatch以后使用。

脚本结构

你的程序应该看起来像这样：

  data <- load.tweets()                      ## read inputs
  scores <- score.sentiment(data,...))       ## clean data/extract info
  ratios <- compute.ratios(scores,data,...)) ## analysis
  save.results(ratios,data,...))             ## save results

例如，在这里我尝试分解 2 个部分：

获取表格结果

我在这里使用mapply，因为您对所有系数重复相同的语句。

compute.ratios <- function(){
  mapply(function(x,y){
    pos5_Count <-results[[x]]
    Pos5_Prop <- pos5_Count/Total_Count
    val <- as.numeric(last.results.alt[[y]]) ## you should check that val !=0
    Pos5_Change <- (pos5_Prop-val)/val*100
  },names(results),seq_along(last.results.alt))
}

定义一个函数来加载数据

load.tweets <- function(){
tweets.con <- dbConnect(MySQL(),user="xxxxxxxxxxxx",password="xxxxxxxxxxxx",
                                dbname="xxxxxxxxxx",host="xxxxxxxxxxxxxxxxxxxx.com")
Feel <- dbGetQuery(tweets.con,"SELECT `tweet_text` 
                             FROM `tweets` 
                             WHERE `created_at` 
                             BETWEEN timestamp(DATE_ADD(NOW(), INTERVAL 49 MINUTE)) 
                             AND timestamp(DATE_ADD(NOW(), INTERVAL 60 MINUTE))")


n <- length(as.matrix(Feel))
Total_Count<- n

results.con<-dbConnect(MySQL(),user="xxxxxxxxxxx",password="xxxxxxxxxxxxxxxxxx",
                               dbname="xxxxxxxxxxxxxx",host="xxxxxxxxxxxxxxxxxx")
last.results.alt <- dbGetQuery(results.con,"SELECT `Neg_Prop_Alt`,`Neu_Prop_Alt`,
                                                   `Pos_Prop_Alt`,`neg5_Prop`,`neg4_Prop`,
                                                   `neg3_Prop`,`neg2_Prop`,`neg1_Prop`,
                                                    `zero_Prop`,`pos1_Prop`,`pos2_Prop`,`pos3_Prop`,
                                                    `pos4_Prop`,`pos5_Prop` 
                                           FROM `results_10m_alt` 
                                           ORDER BY Date DESC LIMIT 1")

list(Feel=Feel,last.results.alt =last.results.alt )
}

score 0 · Accepted Answer

您可以使用该evaluate软件包。knitr使用evaluate包来处理文字文档。

我们可以knit使用evaulate

一个简单的例子是一个脚本 test.r

x <- -1:5
a <- 'a'
x <- x + a
print(x)

我们可以使用`kintr

library(knitr)
knit(text = paste('```{r}',source('test.r'),'```',collapse = '\n'))
 ## Error in x + a : non-numeric argument to binary operator
 # note the error has occured, but evalulation proceeded on
 # and x is defined
 x
 ## [1] -1  0  1  2  3  4  5

以evaluate基本的方式使用

 # cleaning up to show that x will be redefined
 rm(x)
evaluate(input = paste(readLines('test.r')))

## [[1]]
## $src
## [1] "x <- -1:5\n"
## 
## attr(,"class")
## [1] "source"
## 
## [[2]]
## $src
## [1] "a <- 'a'\n"
## 
## attr(,"class")
## [1] "source"
## 
## [[3]]
## $src
## [1] "x <- x + a\n"
## 
## attr(,"class")
## [1] "source"
## 
## [[4]]
## <simpleError in x + a: non-numeric argument to binary operator>
## 
## [[5]]
## $src
## [1] "print(x)"
## 
## attr(,"class")
## [1] "source"
## 
## [[6]]
## [1] "[1] -1  0  1  2  3  4  5\n"

# and x is defined!
x
## [1] -1  0  1  2  3  4  5

r - 从命令行Linux执行R脚本时忽略错误并继续运行

3 回答 3

脚本结构

获取表格结果

定义一个函数来加载数据

Related

Reference