0

我需要将每年对个人重复测量的“宽”数据框转换为“长”格式,以便我可以像 lm(y_year2 ~ x_year1) 和 lm(z_year2 ~ y_year2) 一样对其进行建模

我可以“手动”将它变成我想要的格式,但无法弄清楚如何melt/dcast它变成我想要的形状

下面我用一些模拟数据说明了我正在做什么

宽格式的数据框是这样的,每行一个

ID  SITE    L_03  M_03  R_03  L_04  M_04  R_04  L_05  M_05  R_05
1   forest    X     a   YES     Y     b   YES     Z     c   NO
2   forest    ... 

我想要 LONG 格式:

ID  SITE    L_year1  L_year2  M_year1  M_year2  R_year1   R_year2   year1  year2
1   forest      Z       Y       a         b       YES       YES       03    04
1   forest      Y       Z       b         c       YES       NO        04    05
2   forest      ...  
2   forest      ...

一些模拟数据: L 和 M 是数字(长度和质量),R 是是/否因子(生殖),3 年重复测量(2003-2005)

    ID <- 1:10; SITE <- c(rep("forest",3), rep("swamp",3), rep("field",4))
    L_03 <- round(rnorm(10, 100, 1),3) ; M_03 <- round((10 + L_03*0.25 + rnorm(10, 0, 1)), 3)
    R_03 <- sample(c("Yes", "No"), 10, replace = TRUE) ; L_04 <- round((2 + L_03*1.25 + rnorm(10, 1,10)), 3) 
    M_04 <- round((10 + L_04*0.25 + rnorm(10, 0,10)), 3) ;R_04 <- sample(c("Yes", "No"), 10, replace = TRUE)
    L_05 <- round((2 + L_04*1.25 + rnorm(10, 1,10)),3) ; M_05 <- round((10 + L_05*0.25 + abs(rnorm(10, 0,10))),3)
    R_05 <- sample(c("Yes", "No"), 10, replace = TRUE); rm_data <- data.frame(ID, SITE, L_03, M_03, R_03, L_04, M_04,R_04, L_05, M_05, R_05)

方法 1:我使用第一个“手动”进行临时重塑rbind ,使用 2003 和 2004 数据制作子集,然后使用 2004 和 2005 制作另一个

rm_data1 <- cbind(rm_data[ ,c(1,2,3:5, 6:8)], rep(2003,10), rep(2004,10))
rm_data2 <- cbind(rm_data[ ,c(1,2,6:8, 9:11)],rep(2004,10), rep(2005,10))
names(rm_data1)[3:10]<- c("L1", "M1", "R1", "L2", "M2", "R2", "yr1", "yr2")
names(rm_data2)[3:10]<- c("L1", "M1", "R1", "L2", "M2", "R2", "yr1", "yr2")
data3 <- rbind(rm_data1, rm_data2)

方法 2?:我想用reshape/ melt/来做这个dcast。我不知道我是否可以dcast直接在宽数据帧上使用,或者一旦我使用melt它,如何将dcast其转换为我想要的格式。

library(reshape2)
rm_measure_vars <- c("L_03", "M_03", "R_03", "L_04", "M_04","R_04", "L_05", "M_05", "R_05")
rm_data_melt <-  melt(data = rm_data, id.vars = c("ID", "SITE"), measure.vars = rm_measure_vars, value.name = "data")

我在融化的数据中添加了测量年份的代号

obs_year <- gsub("(.*)([0-9]{2})", "\\2", rm_data_melt$variable)
rm_data_melt <- cbind(rm_data_melt, obs_year)

dcast看起来应该是这样的,但这还不是我需要的

dcast(data = rm_data_melt, formula = ID + SITE + obs_year ~ variable)
   ID   SITE obs_year    L_03   M_03 R_03    L_04   M_04 R_04    L_05   M_05 R_05
1   1 forest       03   99.96 35.364   No    <NA>   <NA> <NA>    <NA>   <NA> <NA>
2   1 forest       04    <NA>   <NA> <NA> 129.595 47.256  Yes    <NA>   <NA> <NA>
3   1 forest       05    <NA>   <NA> <NA>    <NA>   <NA> <NA> 177.607 58.204  Yes

任何建议将不胜感激

4

2 回答 2

2

我试了一下。这reshape是最容易的部分。其余的需要一些半手动处理,我相信。以下应该给你你想要的。

output <- reshape(rm_data, idvar=c("ID","SITE"), varying=3:11, 
                v.names=c("L_","M_","R_"), direction="long")
output$time <- output$time + 2    # to get the year
names(output)[3:6] <- c("year1", "L_year1", "M_year1", "R_year1")
output$year2 <- output$year1+1
rownames(output) <- c()

sapply(output[,4:6], function(x) {
  i <- ncol(output)+1
  output[,i] <<- x[c(2:length(x), NA)]
  names(output)[i] <<- sub("1","2",names(output)[i-4])
})

output <- output[,c(1,2,4,8,5,9,6,10,3,7)]    # rearrange columns as necessary

希望这可以帮助!

于 2013-01-04T20:21:52.480 回答
0

安装 onetree 软件包。devtools::install_github("yikeshu0611/onetree") 库(onetree)

3个步骤,使用onetree包

1 步

将数据重塑为长数据

long1=reshape_toLong(data = rm_data,
               id = "ID",
               j = "year",
               value.var.prefix = c("L_","M_","R_"))

第二步

放弃5年,选择3年和4年;重复年份为 y

long2=long1[long1$year!=5,]
long2$y=long2$year

按年份将 long2 重塑为广泛的数据

wide1=reshape_toWide(data = long2,
               id = "ID",
               j = "year",
               value.var.prefix = c("L_","M_","R_","y")
               )

现在,我们获得了第 3 年和第 4 年的数据,即您的目的数据中的第 1 年和第 2 年。所以我们在 colnames 中用 1 替换 3,用 2 替换 4。

colnames(wide1)=gsub(3,1,colnames(wide1))
colnames(wide1)=gsub(4,2,colnames(wide1))

第三步

再次执行第 2 步,这一次,我们放弃第 3 年,我们选择第 4 年和第 5 年。

long3=long1[long1$year!=3,]
long3$y=long3$year
wide2=reshape_toWide(data = long3,
                     id = "ID",
                     j = "year",
                     value.var.prefix = c("L_","M_","R_","y")
)
colnames(wide2)=gsub(4,1,colnames(wide2))
colnames(wide2)=gsub(5,2,colnames(wide2))

最后的

rbind宽1和宽2

data=rbind(wide1,wide2)
data[order(data$ID),]


   ID   SITE     L_1    M_1 R_1 y1     L_2    M_2 R_2 y2
1   1 forest 100.181 34.279 Yes  3  131.88 50.953  No  4
11  1 forest  131.88 50.953  No  4 158.642 50.255  No  5
2   2 forest 101.645 36.667 Yes  3 123.923 43.915  No  4
12  2 forest 123.923 43.915  No  4  163.81 55.979  No  5
3   3 forest  98.961 33.901 Yes  3 125.928 41.611  No  4
13  3 forest 125.928 41.611  No  4 165.865 57.417  No  5
4   4  swamp 100.807 36.254  No  3 117.856 48.634 Yes  4
14  4  swamp 117.856 48.634 Yes  4 137.487 50.945  No  5
5   5  swamp   99.75 33.881  No  3 132.419 50.563 Yes  4
15  5  swamp 132.419 50.563 Yes  4 168.461 58.373 Yes  5
6   6  swamp 100.463 34.859 Yes  3 122.884 40.301  No  4
16  6  swamp 122.884 40.301  No  4  152.85 57.491  No  5
7   7  field 102.527 34.521  No  3 123.363 35.935  No  4
17  7  field 123.363 35.935  No  4     168 55.692  No  5
8   8  field  99.957 35.236 Yes  3 139.083 34.793  No  4
18  8  field 139.083 34.793  No  4 177.648 62.638 Yes  5
9   9  field  100.16 36.454  No  3 135.468 45.115 Yes  4
19  9  field 135.468 45.115 Yes  4 180.666 57.233  No  5
10 10  field 100.037 35.612  No  3 139.165  46.95  No  4
20 10  field 139.165  46.95  No  4 169.333 55.782 Yes  5
于 2019-07-27T08:31:16.770 回答