1

我很确定这是一个错误,但我只是想先把它放到社区中。在splitstackshape包的Reshape功能示例页面中:

set.seed(1)
mydf <- data.frame(id_1 = 1:6, id_2 = c("A", "B"), varA.1 = sample(letters, 6),
                 varA.2 = sample(letters, 6), varA.3 = sample(letters, 6),
                 varB.2 = sample(10, 6), varB.3 = sample(10, 6),
                 varC.3 = rnorm(6))
mydf

  id_1 id_2 varA.1 varA.2 varA.3 varB.2 varB.3      varC.3
1    1    A      g      y      r      4      3 -0.04493361
2    2    B      j      q      j      7      4 -0.01619026
3    3    A      n      p      s      8      1  0.94383621
4    4    B      u      b      l      2     10  0.82122120
5    5    A      e      e      p     10      6  0.59390132
6    6    B      s      d      u      1      2  0.91897737

然后,

## Note that these data are unbalanced
## reshape() will not work
## Not run: 
reshape(mydf, direction = "long", idvar=1:2, varying=3:ncol(mydf))

## End(Not run)

## The Reshape() function can handle such scenarios

Reshape(mydf, id.vars = c("id_1", "id_2"),
       var.stubs = c("varA", "varB", "varC"))

    id_1 id_2 time varA varB        varC
 1:    1    A    1    g    4 -0.04493361
 2:    2    B    1    j    7 -0.01619026
 3:    3    A    1    n    8  0.94383621
 4:    4    B    1    u    2  0.82122120
 5:    5    A    1    e   10  0.59390132
 6:    6    B    1    s    1  0.91897737
 7:    1    A    2    y    3          NA
 8:    2    B    2    q    4          NA
 9:    3    A    2    p    1          NA
10:    4    B    2    b   10          NA
11:    5    A    2    e    6          NA
12:    6    B    2    d    2          NA
13:    1    A    3    r   NA          NA
14:    2    B    3    j   NA          NA
15:    3    A    3    s   NA          NA
16:    4    B    3    l   NA          NA
17:    5    A    3    p   NA          NA
18:    6    B    3    u   NA          NA

但是基于宽格式的变量名称(精确的数字后缀),输出不应该是:

    id_1 id_2 time varA varB        varC
 1:    1    A    1    g   NA          NA
 2:    2    B    1    j   NA          NA
 3:    3    A    1    n   NA          NA
 4:    4    B    1    u   NA          NA
 5:    5    A    1    e   NA          NA
 6:    6    B    1    s   NA          NA
 7:    1    A    2    y    4          NA
 8:    2    B    2    q    7          NA
 9:    3    A    2    p    8          NA
10:    4    B    2    b    2          NA
11:    5    A    2    e   10          NA
12:    6    B    2    d    1          NA
13:    1    A    3    r    3 -0.04493361
14:    2    B    3    j    4 -0.01619026
15:    3    A    3    s    1  0.94383621
16:    4    B    3    l   10  0.82122120
17:    5    A    3    p    6  0.59390132
18:    6    B    3    u    2  0.91897737

由于 VarA 是在所有三个时间点(1,2 和 3)测量的,VarB 是在时间点 2 和 3 测量的,而 VarC 是在时间点 3 测量的。所以我是否遗漏了一些明显的东西......

tidyr版本似乎是正确的

> library(tidyr)
> mydf %>% gather(key="variable", value="value", varA.1:varC.3) %>%
+   separate(variable, into=c("variable","time")) %>%
+   spread("variable", "value")
   id_1 id_2 time varA varB                varC
1     1    A    1    g <NA>                <NA>
2     1    A    2    y    4                <NA>
3     1    A    3    r    3 -0.0449336090152309
4     2    B    1    j <NA>                <NA>
5     2    B    2    q    7                <NA>
6     2    B    3    j    4 -0.0161902630989461 ...
4

1 回答 1

0

这已在版本 1.4.4 中得到修复,现在可在 CRAN 上使用。感谢您报告错误。

之后update.packages(),您应该能够获得以下信息:

packageVersion("splitstackshape")
## [1] ‘1.4.4’

Reshape(mydf, id.vars = c("id_1", "id_2"), var.stubs = c("varA", "varB", "varC"))
##     id_1 id_2 time varA varB        varC
##  1:    1    A    1    g   NA          NA
##  2:    2    B    1    j   NA          NA
##  3:    3    A    1    n   NA          NA
##  4:    4    B    1    u   NA          NA
##  5:    5    A    1    e   NA          NA
##  6:    6    B    1    s   NA          NA
##  7:    1    A    2    y    4          NA
##  8:    2    B    2    q    7          NA
##  9:    3    A    2    p    8          NA
## 10:    4    B    2    b    2          NA
## 11:    5    A    2    e   10          NA
## 12:    6    B    2    d    1          NA
## 13:    1    A    3    r    3 -0.04493361
## 14:    2    B    3    j    4 -0.01619026
## 15:    3    A    3    s    1  0.94383621
## 16:    4    B    3    l   10  0.82122120
## 17:    5    A    3    p    6  0.59390132
## 18:    6    B    3    u    2  0.91897737
于 2018-03-30T02:21:25.240 回答