3

我有一个类似于以下的数据框,共有 500 列:

Probes <- data.frame(Days=seq(0.01, 4.91, 0.01), B1=5:495,B2=-100:390, B3=10:500,B4=-200:290)

我想计算一个滚动窗口线性回归,其中我的窗口大小为 12 个数据点,每个顺序回归由 6 个数据点分隔。对于每个回归,“天数”始终是模型的 x 分量,而 y 将是其他每一列(B1,然后是 B2、B3 等)。然后,我想将系数保存为具有现有列标题(B1、B2 等)的数据框。

我认为我的代码很接近,但不是很有效。我使用了动物园图书馆的 rollapply。

slopedata<-rollapply(zoo(Probes), width=12, function(Probes) { 
 coef(lm(formula=y~Probes$Days, data = Probes))[2]
 }, by = 6, by.column=TRUE, align="right")

如果可能的话,我还希望将“xmins”保存到向量中以添加到数据框中。这意味着每个回归中使用的最小 x 值(基本上是“天”列中的每 6 个数字。)感谢您的帮助。

4

3 回答 3

2

1)定义一个动物园对象z,其数据包含Probes,其索引取自Probes的第一列,即Days。请注意,lmallowsy是一个矩阵,它定义了一个coefs计算回归系数的函数。终于rollapply结束了z。请注意,返回对象的索引给出了 xmin。

library(zoo)

z <- zoo(Probes, Probes[[1]])

coefs <- function(z) c(unlist(as.data.frame(coef(lm(z[,-1] ~ z[,1])))))
rz <- rollapply(z, 12, by = 6, coefs, by.column = FALSE, align = "left")

给予:

> head(rz)
     B11 B12  B21 B22 B31 B32  B41 B42
0.01   4 100 -101 100   9 100 -201 100
0.07   4 100 -101 100   9 100 -201 100
0.13   4 100 -101 100   9 100 -201 100
0.19   4 100 -101 100   9 100 -201 100
0.25   4 100 -101 100   9 100 -201 100
0.31   4 100 -101 100   9 100 -201 100

请注意,DF <- fortify.zoo(rz)如果您需要rz.

2)另一种有点相似的方法是rollaplly超过行号:

library(zoo)
y <- as.matrix(Probes[-1])
Days <- Probes$Days
n <- nrow(Probes)
coefs <- function(ix) c(unlist(as.data.frame(coef(lm(y ~ Days, subset = ix)))), 
      xmins = Days[ix][1])
r <- rollapply(1:n, 12, by = 6, coefs)
于 2015-11-19T22:10:17.260 回答
0

试试这个:

# here are the xmin values you wanted
xmins <- Probes$Days[seq(1,nrow(Probes),6)]

# here we build a function that will run regressions across the columns
# y1 vs x, y2 vs x, y3 vs x...
# you enter the window and by (12/6) in order to limit the interval being
# regressed. this is later called in do.call
runreg <- function(Probes,m,window=12,by=6){

  # beg,end are used to specify the interval
  beg <- seq(1,nrow(Probes),by)[m]
  end <- beg+window-1

  # this is used to go through all the columns
  N <- ncol(Probes)-1
  tmp <- numeric(N)
  # go through each column and store the coefficients in tmp
  for(i in 1:N){
     y <- Probes[[i+1]][beg:end]
     x <- Probes$Days[beg:end]
     tmp[i] <- coef(lm(y~x))[2][[1]]
  }
  # put all our column regressions into a dataframe
  res <- rbind('coeff'=tmp)
  colnames(res) <- colnames(Probes)[-1]

  return(res)
}

# now that we've built the function to do the column regressions
# we just need to go through all the window-ed regressions (row regressions)
res <- do.call(rbind,lapply(1:length(xmins),function(m) runreg(Probes,m)))

# these rownames are the index of the xmin values
rownames(res) <- seq(1,nrow(Probes),6)
res <- data.frame(res,xmins)
于 2015-11-19T21:27:48.647 回答
0

您也可以rollRegres按如下方式使用该包

# setup data
Probes <- data.frame(
  # I changed the days to be intergers
  Days=seq(1L, 491L, 1L),
  B1=5:495, B2=-100:390, B3=10:500 , B4=-200:290)

# setup grp argument
grp_arg <- as.integer((Probes$Days - 1L) %/% 6)

# estimate coefs. width argument is realtive in grp units
library(rollRegres)
X <- cbind(1, Probes$Days / 100)
Ys <- as.matrix(Probes[, 2:5])
out <- lapply(1:ncol(Ys), function(i)
  roll_regres.fit(x = X, y = Ys[, i], width = 2L, grp = grp_arg)$coefs)
out <- do.call(cbind, out)

# only keep the complete.cases and the unique values
colnames(out) <- sapply(1:4, function(i) paste0("B", i, 0:1))
out <- out[c(T, grp_arg[-1] != head(grp_arg, -1)), ]
out <- out[complete.cases(out), ]
head(out)
#R      B10 B11  B20 B21 B30 B31  B40 B41
#R [1,]   4 100 -101 100   9 100 -201 100
#R [2,]   4 100 -101 100   9 100 -201 100
#R [3,]   4 100 -101 100   9 100 -201 100
#R [4,]   4 100 -101 100   9 100 -201 100
#R [5,]   4 100 -101 100   9 100 -201 100
#R [6,]   4 100 -101 100   9 100 -201 100

zoo该解决方案比例如解决方案快得多

library(zoo) coefs <- function(z) c(unlist(as.data.frame(coef(lm(z[,-1] ~ z[,1]))))) microbenchmark::microbenchmark(   rollapply = {
    z <- zoo(Probes, Probes[[1]])
    rz <- rollapply(z, 12, by = 6, coefs, by.column = FALSE, align = "left")   },   roll_regres = {
    grp_arg <- as.integer((Probes$Days - 1L) %/% 6)

    X <- cbind(1, Probes$Days / 100)
    Ys <- as.matrix(Probes[, 2:5])
    out <- lapply(1:ncol(Ys), function(i)
      roll_regres.fit(x = X, y = Ys[, i], width = 2L, grp = grp_arg)$coefs)
    out <- do.call(cbind, out)

    colnames(out) <- sapply(1:4, function(i) paste0("B", i, 0:1))
    out <- out[c(T, grp_arg[-1] != head(grp_arg, -1)), ]
    out <- out[complete.cases(out), ]
    head(out)   } )
#R Unit: microseconds
#R        expr       min        lq      mean     median        uq       max neval
#R   rollapply 53392.614 56330.492 59793.106 58363.2825 60902.938 119206.76   100
#R roll_regres   865.186   920.297  1074.161   983.9015  1047.705   5071.41   100

目前你需要从 Github 安装包,因为 version 的验证出错0.1.0。因此,运行

devtools::install_github("boennecd/rollRegres", upgrade_dependencies = FALSE,
                         build_vignettes = TRUE)
于 2018-07-02T20:04:37.087 回答