1

我有一个数据框,然后将其拆分为三个(或任意数量)的数据框。

我要做的是自动处理每个数据框中的每一列并添加现有变量的滞后版本。

例如,如果每个 data.frame 中有三个变量(V1、V2、V3),我想自动(没有硬编码)添加 V1.lag、V2.lag 和 V3.lag。

这是我到目前为止所拥有的,但我现在被困住了。

任何帮助都会受到高度赞赏。

dd<-data.frame(matrix(rnorm(216),72,3),c(rep("A",24),rep("B",24),rep("C",24)),c(rep("J",36),rep("K",36)));
colnames(dd) <- c("v1", "v2", "v3", "dim1", "dim2");
dd;
dds <- split(dd, dd$dim1);
dds;
# Missing step 1: Automatically create v1.lag, v2.lag, v3.lag, etc (if required)

最后,我想将三个数据框合并为一个大数据框,其中将包含新创建的变量。

# Missing step 2: Merge data frames into single data frame

任何帮助将不胜感激。

编辑:在评论部分,我询问了移动平均线而不是滞后。这是解决方案:

ma <- function(x, f=c(1,1,1)){as.numeric(filter(x, f, sides=1)/length(f));}
foo <- function(df, f = c(1,1,1)) {
nums <- sapply(df, is.numeric); ## which are numeric vars
nams <- paste(names(df)[nums], "ma", length(f), sep = "."); ## generate new names    foo.ma
df[, nams] <- lapply(which(nums), function(id, df, f) ma(df[[id]], f = f), df = df, f = f); ## apply ma to each numeric variable
  df; ## return
   }
4

2 回答 2

3

这是一种选择:

## reuse @Andrie's clag() function as lag() is silly
clag <- function(x, n = 1) c(rep(NA, n), head(x, -n))
## wrapper function to do the addition of lag variables for single DF
foo <- function(df, n = 1) {
  nums <- sapply(df, is.numeric) ## which are numeric vars
  nams <- paste(names(df)[nums], "lag", sep = ".") ## generate new names foo.lag
  df[, nams] <- lapply(which(nums), function(id, df, n) clag(df[[id]], n = n),
                       df = df, n = n) ## apply clag to each numeric variable
  df ## return
}

lapply(dds, foo)

这使:

> lapply(dds, foo)
$A
            v1          v2           v3 dim1 dim2      v1.lag      v2.lag       v3.lag
1  -1.15107343  1.47671548 -0.146501739    A    J          NA          NA           NA
2  -1.61068272 -0.85397093 -1.240187604    A    J -1.15107343  1.47671548 -0.146501739
3  -1.23470282 -0.26194027  1.938344030    A    J -1.61068272 -0.85397093 -1.240187604
4  -0.57874043 -0.44600138  0.326069423    A    J -1.23470282 -0.26194027  1.938344030
5   0.16139066 -1.95804742 -0.744678169    A    J -0.57874043 -0.44600138  0.326069423
6  -1.01497027  0.36850034  1.532640065    A    J  0.16139066 -1.95804742 -0.744678169
7   0.72288058 -0.40115543 -0.686450596    A    J -1.01497027  0.36850034  1.532640065
8  -0.51300447  0.19686310  0.441649595    A    J  0.72288058 -0.40115543 -0.686450596
9   0.95439966 -2.03513002 -0.897784897    A    J -0.51300447  0.19686310  0.441649595
10 -1.36736081 -0.41040962 -0.459403176    A    J  0.95439966 -2.03513002 -0.897784897
11  0.59503846  0.28925760 -0.003095389    A    J -1.36736081 -0.41040962 -0.459403176
12 -0.37951869  0.49551357  0.269412108    A    J  0.59503846  0.28925760 -0.003095389
13 -0.52953401 -0.28433351  1.125505917    A    J -0.37951869  0.49551357  0.269412108
14 -1.73466020  0.25442637 -1.094139749    A    J -0.52953401 -0.28433351  1.125505917
15  0.08479137 -0.11688894 -1.034378216    A    J -1.73466020  0.25442637 -1.094139749
16 -2.45854464  0.15806266 -2.275995527    A    J  0.08479137 -0.11688894 -1.034378216
17  1.10663502  1.28587230  0.070334868    A    J -2.45854464  0.15806266 -2.275995527
18 -0.01945585  1.63659116 -0.137040232    A    J  1.10663502  1.28587230  0.070334868
19  0.59026606 -1.95724134 -0.480014930    A    J -0.01945585  1.63659116 -0.137040232
20 -0.32245933  1.35372005  1.348717525    A    J  0.59026606 -1.95724134 -0.480014930
21 -0.42560327 -1.30145328  2.020609480    A    J -0.32245933  1.35372005  1.348717525
22  1.19550777  0.18417336  0.099232994    A    J -0.42560327 -1.30145328  2.020609480
23  1.20198621  0.05926023 -0.171505810    A    J  1.19550777  0.18417336  0.099232994
24 -1.00667141  1.32441782  0.056696824    A    J  1.20198621  0.05926023 -0.171505810

$B
           v1          v2          v3 dim1 dim2     v1.lag      v2.lag      v3.lag
25  0.7878614  0.10354576 -0.69308980    B    J         NA          NA          NA
26  0.5824551  0.42319616  0.42734938    B    J  0.7878614  0.10354576 -0.69308980
27 -0.2769730  1.51559382 -0.64106570    B    J  0.5824551  0.42319616  0.42734938
28 -0.5736416 -1.58745816 -1.13274631    B    J -0.2769730  1.51559382 -0.64106570
29 -1.9082145 -0.26148604 -0.04699411    B    J -0.5736416 -1.58745816 -1.13274631
30 -1.6254549  0.39390814 -1.79993619    B    J -1.9082145 -0.26148604 -0.04699411
31  0.3963274  1.79667985  0.92873142    B    J -1.6254549  0.39390814 -1.79993619
32 -0.5889415 -0.04690351  1.43394978    B    J  0.3963274  1.79667985  0.92873142
33  0.4683819 -1.34023029  0.18749782    B    J -0.5889415 -0.04690351  1.43394978
34  0.7373052 -0.93470320 -1.14528378    B    J  0.4683819 -1.34023029  0.18749782
35 -0.7751348 -1.26533917  0.11246728    B    J  0.7373052 -0.93470320 -1.14528378
36  1.7786627 -0.19757164  0.14150980    B    J -0.7751348 -1.26533917  0.11246728
37  1.8570412 -2.15174901  1.07751105    B    K  1.7786627 -0.19757164  0.14150980
38  0.5128697  0.40112948 -0.94826274    B    K  1.8570412 -2.15174901  1.07751105
39  0.8710264 -0.59978467  0.54462858    B    K  0.5128697  0.40112948 -0.94826274
40 -0.3711512 -0.15632337  0.15832543    B    K  0.8710264 -0.59978467  0.54462858
41  1.4505624  0.20915835  2.59369653    B    K -0.3711512 -0.15632337  0.15832543
42  0.0871329  0.25440471  0.30096063    B    K  1.4505624  0.20915835  2.59369653
43 -0.7398342 -1.72678544  0.45534941    B    K  0.0871329  0.25440471  0.30096063
44  0.1953264 -0.60560630 -0.36884626    B    K -0.7398342 -1.72678544  0.45534941
45 -0.2702493  0.50747209 -0.50699830    B    K  0.1953264 -0.60560630 -0.36884626
46  0.2987449  0.46347722  1.20725190    B    K -0.2702493  0.50747209 -0.50699830
47 -0.5682779 -0.71470625 -0.07865078    B    K  0.2987449  0.46347722  1.20725190
48 -1.5291983  1.80092050 -1.73317395    B    K -0.5682779 -0.71470625 -0.07865078

$C
            v1           v2           v3 dim1 dim2      v1.lag       v2.lag       v3.lag
49  0.06095825 -0.518263220  0.510999371    C    K          NA           NA           NA
50  0.40077713  0.477989115  0.855752036    C    K  0.06095825 -0.518263220  0.510999371
51  0.06763037  0.802110426 -0.102536186    C    K  0.40077713  0.477989115  0.855752036
52 -0.90530986 -0.005452101 -0.089703589    C    K  0.06763037  0.802110426 -0.102536186
53 -0.79360209  0.299844218 -0.765164525    C    K -0.90530986 -0.005452101 -0.089703589
54  1.34050298 -1.093705314 -0.955952912    C    K -0.79360209  0.299844218 -0.765164525
55  0.45377712  0.054978470  0.382874895    C    K  1.34050298 -1.093705314 -0.955952912
56  0.95283101 -0.564193352  1.458002944    C    K  0.45377712  0.054978470  0.382874895
57  1.09157807 -1.351894599 -1.366084414    C    K  0.95283101 -0.564193352  1.458002944
58  2.71993062 -1.126272793  1.374046159    C    K  1.09157807 -1.351894599 -1.366084414
59 -0.04685281  0.423085481 -0.455903151    C    K  2.71993062 -1.126272793  1.374046159
60 -0.31055449  0.818291875  0.400386018    C    K -0.04685281  0.423085481 -0.455903151
61 -0.54904545  1.542272313  0.648135340    C    K -0.31055449  0.818291875  0.400386018
62 -0.72914142  1.495482707 -0.212135011    C    K -0.54904545  1.542272313  0.648135340
63 -0.27374611 -1.309254707 -0.005125047    C    K -0.72914142  1.495482707 -0.212135011
64  0.87439910 -2.666588138  1.043778597    C    K -0.27374611 -1.309254707 -0.005125047
65  1.07142042  0.446233778 -0.286784683    C    K  0.87439910 -2.666588138  1.043778597
66 -0.10431808  0.510820156  0.405309569    C    K  1.07142042  0.446233778 -0.286784683
67 -1.04006019 -0.041327622  1.202855549    C    K -0.10431808  0.510820156  0.405309569
68  0.41084794 -0.376796559 -1.147032471    C    K -1.04006019 -0.041327622  1.202855549
69  0.88329788 -0.344611311  1.862998306    C    K  0.41084794 -0.376796559 -1.147032471
70 -0.67916248  1.396061431  0.697517685    C    K  0.88329788 -0.344611311  1.862998306
71  3.55359528 -0.207825480 -0.949834845    C    K -0.67916248  1.396061431  0.697517685
72  0.11329113  0.294747300 -0.955891419    C    K  3.55359528 -0.207825480 -0.949834845

最后一点,合并步骤,保存上面的:

dds <- lapply(dds, foo)

然后一起使用do.call()各个rbind()数据帧,如下所示:

df2 <- do.call(rbind, dds)

这使:

> head(df2)
            v1         v2         v3 dim1 dim2     v1.lag     v2.lag     v3.lag
A.1 -1.1510734  1.4767155 -0.1465017    A    J         NA         NA         NA
A.2 -1.6106827 -0.8539709 -1.2401876    A    J -1.1510734  1.4767155 -0.1465017
A.3 -1.2347028 -0.2619403  1.9383440    A    J -1.6106827 -0.8539709 -1.2401876
A.4 -0.5787404 -0.4460014  0.3260694    A    J -1.2347028 -0.2619403  1.9383440
A.5  0.1613907 -1.9580474 -0.7446782    A    J -0.5787404 -0.4460014  0.3260694
A.6 -1.0149703  0.3685003  1.5326401    A    J  0.1613907 -1.9580474 -0.7446782
于 2013-01-03T15:15:12.140 回答
2

使用plyr包来完成所有这一步:

library(plyr)
clag <- function(x, n=1)c(rep(NA, n), head(x, -n))
x <- ddply(dd, .(dim1), transform, 
               v1.lag=clag(v1), v2.lag=clag(v2), v3.lag=clag(v3))

head(x)
          v1         v2         v3 dim1 dim2     v1.lag     v2.lag     v3.lag
1  0.4465910 -0.2564334 -0.9122640    A    J         NA         NA         NA
2 -0.3748563 -0.9461061  0.1641274    A    J  0.4465910 -0.2564334 -0.9122640
3 -0.5010834 -0.4413026 -0.7509968    A    J -0.3748563 -0.9461061  0.1641274
4 -0.5278584 -0.6377017  0.5528831    A    J -0.5010834 -0.4413026 -0.7509968
5 -0.4290586  0.4687849  0.6885102    A    J -0.5278584 -0.6377017  0.5528831
6  0.1179935 -0.2742456 -0.1945482    A    J -0.4290586  0.4687849  0.6885102
于 2013-01-03T14:59:53.857 回答