r - 矢量化进入和退出

Question

我想知道是否有返回以下内容的矢量化方式：

我有一个向量 =

x = c(-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,11,10,9,8,7,6,5,4,3,2,1,0,-1,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12)

我想得到一个相同长度的向量，这样当它超过 5 时，它将设置为 1（真），直到它低于 0（假）。我目前正在做一个 for 循环，如果上述系列有大量观察结果，这将永远持续下去。

答案应该返回：

results = c(0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1)

有任何想法吗？

score 6 · Accepted Answer

使用 package zoo，您可以使用：

results2 <- na.locf(c(NA,1,0)[(x>=5) + 2*(x<=0) + 1],na.rm=FALSE)

identical(results2, results)
#[1] TRUE

score 4 · Accepted Answer

这很丑陋，但它似乎适用于非常复杂的场景：

entex <- function(x,uplim,lwlim) {

  result <- vector("integer",0)
  upr <- which(x>=uplim)
  lwr <- which(x<=lwlim)

  while(length(upr) > 0) {
    if(min(upr) > max(lwr)) {
      result <- unique(c(result,upr))
      upr <- upr[upr > max(result)]
    } else
    {
      result <- unique(c(result,upr[1]:(min(lwr[lwr>upr[1]])-1)))
      lwr <- lwr[lwr > max(result)]
      upr <- upr[upr > max(result)]
    }
  }
  result
}

为了证明它有效：

plot(x,pch=19,type="o")
abline(h=c(0,5),col="lightblue")
result <- entex(x,5,0)
abline(v=result,col="red")

在此处输入图像描述

还有一个更复杂的例子x：

x <- c(-0.6, -0.3, 0.5, 0.6, 3, 4.1, 6.7, 3.7, 7.5, 4.1, 6.8, 4.8, 3.3,
       1.6, 3.1, 2, 1.3, 2.9, 2.8, 1.9, 0, -0.5, -0.6, 0.3, 1.9, 5.1, 6.4)

在此处输入图像描述

score 4 · Accepted Answer

您可以使用逻辑值识别更改点并查找该状态的更改：

findChangePoint <- function(y,cp){
  results <- 0*y
  state = 0 
  i = 1
  while (i <= length(y)){
    if((state ==0 ) & (y[i] >max(cp))){
      state = 1
    }
    if ((state == 1) && (y[i] <= min(cp))){
      state = 0
    }
    results[i] = state
    i = i+1
  }
  return(results)
}

然后我们可以创建一个函数来绘制它：

plotChangePoints <- function(y,cp){
  p.state <- ggplot(data = data.frame(x = seq(1,length(y)),
                                      y=y,
                                      state = findChangePoint(y,cp))) +
    geom_point(aes(x = x,
                   y = y)) +
    geom_point(aes(x = x,
                  y = state),
               color = "red")    
  print(p.state)
  return(p.state)
}

所以现在当你这样做时，使用建议的更复杂的数据：

y <- c(-0.6, -0.3, 0.5, 0.6, 3, 4.1, 6.7, 3.7, 7.5,
     4.1, 6.8, 4.8, 3.3, 1.6, 3.1, 2, 1.3, 2.9, 2.8, 1.9,
     0, -0.5, -0.6, 0.3, 1.9, 5.1, 6.4)
# specify the change points we will use:
cp=c(5,1)
plotChangePoints(y,cp)

你明白了，黑点是数据，红色是状态（即“切换”与否）

在此处输入图像描述

而且，如果您想要的只是结果，请使用：

results <- findChangePoint(y,cp)

score 2 · Accepted Answer

更新：添加了编辑、测试和基准。

（对不起，我昨天无法测试）

这是一个本质上是纯逻辑比较的解决方案，比zoo

identical(results, UpAndDown(x))
# [1] TRUE

## 2,000 iterations, less than 0.1 seconds. 
> system.time(for(i in 1:2000) UpAndDown(x))
   user  system elapsed 
  0.080   0.001   0.082 

UpAndDown <- function(x, lowBound=0, upBound=5, numeric=TRUE) {
  ## This gets most of it
  high <-  (x >= upBound)
  low  <-  (x <= lowBound)

  res <- high & !low

  ## This grabs the middle portions
  fvs <- which(x==upBound)  
  zrs <- which(x==lowBound) 

  # The middle spots are those where zrs > fvs
  m <- which(zrs > fvs)

  # This is only iterating over a vector of a handufl of indecies
  #  It's not iterating over x
  mids <- unlist(lapply(m, function(i) seq(fvs[i], zrs[i]-1)), use.names=FALSE)
  res[mids] <- TRUE

  if (numeric)
    res <- as.numeric(res)

  # logical
  return(res)

}

基准：

# Small x
microbenchmark(UpAndDown=UpAndDown(x), Entex=entex(x,5,0), ZOO=na.locf(c(NA,1,0)[(x==5) + 2*(x<=0) + 1],na.rm=FALSE))

Unit: microseconds
      expr    min      lq  median      uq     max neval
 UpAndDown 31.573 36.1965 42.4240 46.9765 146.599   100
     Entex 40.113 46.1030 51.9605 57.3170 114.269   100
       ZOO 60.169 68.7335 78.2480 83.0360 176.159   100

较大的 x：

# With Larger x

x <- c(seq(-10, 10), seq(11, -7), seq(-8, 15), seq(16, -28), seq(-29, 100), seq(101, -9)) 
x <- c(x, x, x)
length(x)
# [1] 1050

## CONFIRM VALUES
identical(UpAndDown(x), na.locf(c(NA,1,0)[(x==5) + 2*(x<=0) + 1]))
# [1] TRUE

## Benchmark
microbenchmark(
    UpAndDown=UpAndDown(x), 
    fcp=findChangePoint(x, c(5,1)), 
    Entex=entex(x,5,0), 
    ZOO=na.locf(c(NA,1,0)[(x==5) + 2*(x<=0) + 1],na.rm=FALSE)
  )

Unit: microseconds
      expr      min        lq    median        uq       max neval
 UpAndDown  141.149  162.9125  183.8080  206.9560   403.528   100
       fcp 5719.692 6056.1760 6379.4355 7376.7370 21456.502   100
     Entex  416.570  446.8780  469.7845  501.0985   795.853   100
       ZOO  192.449  209.1260  249.3805  281.4820   489.416   100

注意：如果期望非整数值（或者，一般情况下，缺少确切的边界数字，例如0& 5），则使用以下定义代替

  ## ----------------------------##
    fvs <- which(high)
    zrs <- which(low)

    # This is only iterating over a vector of a handufl of indecies
    #  It's not iterating over x
    mids <- unlist(sapply(fvs, function(x) {
                                Z <- x<zrs; 
                                if (any(Z)) 
                                  seq(x, zrs[min(which(Z), na.rm=TRUE)]-1)
                            }
                  ), use.names=FALSE)

score 1 · Accepted Answer

这真的是一个很长的评论......让我印象深刻的是，这就是施密特触发器（运算放大器）的作用。这让我想知道是否有一种方法可以运行while具有可重置条件的循环。

limits <- c(5,0)
flop = 1
threshold<-limits[1]
for(j in 1:length(x) {

 while(x*(-1^(1-flop) < threshold) { 
do_stuff
}
threshold<-limits[flop+1]
flop <- !flop
}

我可能有几个负面迹象，但你明白了。

score 0 · Accepted Answer

您可以完全使用rle()并避免编写 for/while 循环：

x <- c(-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,11,10,9,8,7,6,5,4,3,2,1,0,-1,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12)

result <- rep(99, length(x))
result[x >= 5] <- 1
result[x <= 0] <- 0

result
#  [1]  0  0  0  0  0 99 99 99 99  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 99
# [26] 99 99 99  0  0  0  0  0 99 99 99 99  1  1  1  1  1  1  1  1

# Run-length-encode it
result_rle <- rle(result)
# Find the 99's and replace them with the previous value
missing_idx <- which(result_rle$values == 99)
result_rle$values[missing_idx] <- result_rle$values[missing_idx - 1]
# Inverse of the RLE
result <- inverse.rle(result_rle)

# Check value
expected <- c(0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1)
identical(result, expected)
# TRUE

请注意，如果第一个值介于 0 和 5 之间，这将给出错误，但添加一个检查很简单。你还需要决定在这种情况下你想要什么行为。

r - 矢量化进入和退出

6 回答 6

更新：添加了编辑、测试和基准。

基准：

较大的 x：

Related

Reference