5

如何使用plyr, reshape2, aggregatefunction 和/或更优雅地做到这一点data.table

library(plyr)

set.seed(1) 
x <- data.frame(Ind = paste0("Ind", 1:10), Treatment = c(rep("Treat",10),rep("Cont",10)),
value = rnorm(20,60,8))

tr <- subset(x, Treatment == "Treat")
tr <- rename(tr, c("value" = "Treat"))

ct <- subset(x, Treatment == "Cont")
ct <- rename(ct, c("value" = "Cont"))

merge(ct[-2], tr[-2], by = "Ind", all = T, sort = F)

# Do not run, data.frame:
     Ind     Cont    Treat
1   Ind1 72.09425 54.98837
2   Ind2 63.11875 61.46915
3   Ind3 55.03008 53.31497
4   Ind4 42.28240 72.76225
5   Ind5 68.99945 62.63606
6   Ind6 59.64053 53.43625
7   Ind7 59.87048 63.89943
8   Ind8 67.55069 65.90660
9   Ind9 66.56977 64.60625
10 Ind10 64.75121 57.55689
4

3 回答 3

11

要添加到您的选项...

这是我们的起始数据:

set.seed(1) # Nice for reproducible examples
x <- data.frame(Ind = paste0("Ind", 1:10), 
                Treatment = c(rep("Treat",10),rep("Cont",10)),
                value = rnorm(20,60,8))

xtabs

请注意,输出是 a matrix,而不是 a data.frame

xtabs(value ~ Ind + Treatment, x)
#        Treatment
# Ind         Cont    Treat
#   Ind1  72.09425 54.98837
#   Ind10 64.75121 57.55689
#   Ind2  63.11875 61.46915
#   Ind3  55.03008 53.31497
#   Ind4  42.28240 72.76225
#   Ind5  68.99945 62.63606
#   Ind6  59.64053 53.43625
#   Ind7  59.87048 63.89943
#   Ind8  67.55069 65.90660
#   Ind9  66.56977 64.60625

reshape

reshape(x, direction = "wide", idvar="Ind", timevar="Treatment")
#      Ind value.Treat value.Cont
# 1   Ind1    54.98837   72.09425
# 2   Ind2    61.46915   63.11875
# 3   Ind3    53.31497   55.03008
# 4   Ind4    72.76225   42.28240
# 5   Ind5    62.63606   68.99945
# 6   Ind6    53.43625   59.64053
# 7   Ind7    63.89943   59.87048
# 8   Ind8    65.90660   67.55069
# 9   Ind9    64.60625   66.56977
# 10 Ind10    57.55689   64.75121

reshape如果您想使用以下选项同时更改名称:

setNames(reshape(x, direction = "wide", idvar="Ind", timevar="Treatment"), 
         c("Ind", "Treat", "Cont"))

split+merge

同样,setNames可以在这里使用,或者您可以在之后重命名列。

temp <- split(x[-2], x$Treatment)
merge(temp[[1]], temp[[2]], by = "Ind", suffixes = names(temp))
#      Ind valueCont valueTreat
# 1   Ind1  72.09425   54.98837
# 2  Ind10  64.75121   57.55689
# 3   Ind2  63.11875   61.46915
# 4   Ind3  55.03008   53.31497
# 5   Ind4  42.28240   72.76225
# 6   Ind5  68.99945   62.63606
# 7   Ind6  59.64053   53.43625
# 8   Ind7  59.87048   63.89943
# 9   Ind8  67.55069   65.90660
# 10  Ind9  66.56977   64.60625

ddplyplry

(我不是普通的“plyr”用户,所以完全不确定这是否是最好的方法)。

library(plyr)
ddply(x, .(Ind), summarize, 
      Treat = value[Treatment == "Treat"], 
      Cont = value[Treatment == "Cont"])
#      Ind    Treat     Cont
# 1   Ind1 54.98837 72.09425
# 2  Ind10 57.55689 64.75121
# 3   Ind2 61.46915 63.11875
# 4   Ind3 53.31497 55.03008
# 5   Ind4 72.76225 42.28240
# 6   Ind5 62.63606 68.99945
# 7   Ind6 53.43625 59.64053
# 8   Ind7 63.89943 59.87048
# 9   Ind8 65.90660 67.55069
# 10  Ind9 64.60625 66.56977

unstack(好像选项还不够!)

unique(data.frame(x[1], unstack(x, value ~ Treatment)))
#      Ind     Cont    Treat
# 1   Ind1 72.09425 54.98837
# 2   Ind2 63.11875 61.46915
# 3   Ind3 55.03008 53.31497
# 4   Ind4 42.28240 72.76225
# 5   Ind5 68.99945 62.63606
# 6   Ind6 59.64053 53.43625
# 7   Ind7 59.87048 63.89943
# 8   Ind8 67.55069 65.90660
# 9   Ind9 66.56977 64.60625
# 10 Ind10 64.75121 57.55689
于 2013-04-16T09:50:52.150 回答
7

这里有一个data.table方法:

x.dt <- as.data.table(x)
setkey(x.dt, "Ind")
x.dt[, setattr(as.list(value), 'names', c("Treat", "Cont")),by=Ind]
#       Ind    Treat     Cont
#  1:  Ind1 57.73997 54.06263
#  2: Ind10 64.23664 65.98024
#  3:  Ind2 58.71422 58.01650
#  4:  Ind3 52.71239 62.64899
#  5:  Ind4 65.09401 75.51550
#  6:  Ind5 47.04052 61.80900
#  7:  Ind6 61.95129 55.20021
#  8:  Ind7 58.02494 55.41143
#  9:  Ind8 69.38424 57.71132
# 10:  Ind9 62.02491 57.06147
于 2013-04-16T09:33:53.763 回答
6

You can use function dcast() from library reshape2.

 dcast(data=x,Ind~Treatment)
     Ind     Cont    Treat
1   Ind1 53.45988 53.68913
2  Ind10 54.02344 66.32866
3   Ind2 57.44591 62.32354
4   Ind3 67.53185 53.14807
5   Ind4 52.42713 55.04052
6   Ind5 63.80633 61.58893
7   Ind6 59.40308 51.66228
8   Ind7 67.79597 73.60620
9   Ind8 58.15420 65.06976
10  Ind9 61.45161 63.73947
于 2013-04-16T09:07:01.927 回答