如果我了解您要做什么,我认为这些功能应该适合您。如果您愿意,您可以轻松地将removeAllNA()
函数重写为循环而不是递归,但是使用向量化函数removeNA()
将比使用循环完成整个事情要快得多:
## df - data frame with Count and Position variables
## n - the number of consecutive NA values to rollup
removeNA <- function(df,n=1){
#remove any leading NA values
if(is.na(df$Count)[1]) df <- df[-(1:(which(!is.na(df$Count))[1]-1)),]
#remove any ending NA values
if(tail(is.na(df$Count),1)) df <- df[-(nrow(df):(tail(which(!is.na(df$Count)),1)+1)),]
#get the indices for non NA values
i <- which(!is.na(df$Count))
#get the indices for the non NA values to rollup to based on the inputed "n"
i <- i[diff(i) == n+1] + 1
#return the data frame if no values to rollup
if(length(i) == 0) return(df)
#remove any values are out of range
i <- i[i < nrow(df)]
#rollup variables
df$Count[i+n] <- df$Count[i+n] + df$Count[i-1]
#remove variables that were included in rollup
return(df[-c(sapply(1:n,function(x) i + x -1),i-1),])
}
removeAllNA <- function(df,n=1){
if(!any(is.na(df$Count)) | n > nrow(df)){
return(df)
} else {
df <- removeNA(df,n)
removeAllNA(df,n+1)
}
}
你的例子:
> tst <- data.frame(Position=c(15,22,38,49,55,61),Count=c(15,NA,NA,5,NA,17))
> removeNA(tst,1)
Position Count
1 15 15
2 22 NA
3 38 NA
6 61 22
> removeNA(removeNA(tst,1),2)
Position Count
6 61 37
> removeAllNA(tst)
Position Count
6 61 37
更大的随机示例:
> set.seed(34)
> dat <- data.frame(Position=1:100,Count=round(runif(100,5,25)))
> dat$Count[sample(100,60)] <- NA
> removeAllNA(dat)
Position Count
5 5 24
9 9 35
10 10 16
11 11 11
24 24 59
25 25 14
28 28 44
29 29 18
30 30 16
36 36 42
37 37 6
38 38 16
39 39 13
51 51 65
52 52 11
62 62 27
89 89 84
95 95 39
96 96 22
97 97 9
编辑:添加第二个功能:
addNotNA <- function(df){
i <- which(!is.na(df$Count))
i <- i[which(diff(i) == 1)] + 1
if(length(i) == 0) return(df)
df$Count[i] <- df$Count[i] + df$Count[i-1]
return(df[-(i-1),])
}
> addNotNA(df)
Position Count
2 22 36
3 38 NA
4 49 5
5 55 NA
6 61 17