0

您能否告诉我如何像这样转换数据框:

    tg  qr  loc a1  a2  a3  b1  b2  b3  c1  c2  c3
1   A   1   89  NA  NA  NA  1   2   3   1   2   3
2   A   1   61  1   2   3   NA  NA  NA  1   2   3
3   A   2   38  4   5   6   NA  NA  NA  NA  NA  NA
4   B   1   40  4   5   6   NA  NA  NA  NA  NA  NA
5   B   1   3   NA  NA  NA  NA  NA  NA  4   5   6

进入这个:

    tg  qr  loc a1  a2  a3  b1  b2  b3  c1  c2  c3
1   A   1   15  1   2   3   1   2   3   1   2   3
2   A   2   95  4   5   6   NA  NA  NA  NA  NA  NA
3   B   1   42  4   5   6   NA  NA  NA  4   5   6

该功能应:

  • 将列“tg”和“qr”中具有相同值的所有行合并为一行
  • 合并时,将所有“NAs”替换为现有值 - 绝不是相反的方向
  • 通常会有这样的情况,当一个变量存在于被合并的两行中,但它的值总是相等的(那么它从哪一行取并不重要)
  • 'loc' 列值不同,但不相关,甚至可以删除该列

这些示例数据帧的代码是:

df = rbind(c("A","1",floor(runif(1,1,100)),c(NA,NA,NA),c(1,2,3),c(1,2,3)),
           c("A","1",floor(runif(1,1,100)),c(1,2,3),c(NA,NA,NA),c(1,2,3)),
           c("A","2",floor(runif(1,1,100)),c(4,5,6),c(NA,NA,NA),c(NA,NA,NA)),
           c("B","1",floor(runif(1,1,100)),c(4,5,6),c(NA,NA,NA),c(NA,NA,NA)),
           c("B","1",floor(runif(1,1,100)),c(NA,NA,NA),c(NA,NA,NA),c(4,5,6)))
df = as.data.frame(df)
colnames(df) = c("target","query","loc",c("a1","a2","a3"),c("b1","b2","b3"),c("c1","c2","c3"))

df2 = rbind(c("A","1",floor(runif(1,1,100)),c(1,2,3),c(1,2,3),c(1,2,3)),
            c("A","2",floor(runif(1,1,100)),c(4,5,6),c(NA,NA,NA),c(NA,NA,NA)),
            c("B","1",floor(runif(1,1,100)),c(4,5,6),c(NA,NA,NA),c(4,5,6)))
df2 = as.data.frame(df2)
colnames(df2) = c("target","query","loc",c("a1","a2","a3"),c("b1","b2","b3"),c("c1","c2","c3"))

谢谢您的支持。

4

2 回答 2

2

使用na.omit

library(data.table)
dt = data.table(df)

dt[, lapply(.SD, function(x) na.omit(x)[1]), by = list(target, query)]
#   target query loc a1 a2 a3 b1 b2 b3 c1 c2 c3
#1:      A     1  21  1  2  3  1  2  3  1  2  3
#2:      A     2  71  4  5  6 NA NA NA NA NA NA
#3:      B     1  25  4  5  6 NA NA NA  4  5  6
于 2013-11-06T19:42:51.620 回答
1

可能是这样的?

library(data.table)
dt <- data.table(df)
dt <- dt[,lapply(.SD, as.numeric), by = c("target","query")]
dt2 <- dt[,lapply(.SD, mean, na.rm = TRUE), by = c("target","query")]
dt2[is.na(dt2)] <- NA

dt2

> dt2
   target query loc a1 a2 a3 b1 b2 b3 c1 c2 c3
1:      A     1 2.0  1  1  1  1  1  1  1  1  1
2:      A     2 2.0  2  2  2 NA NA NA NA NA NA
3:      B     1 2.5  2  2  2 NA NA NA  2  2  2
于 2013-11-06T18:13:27.780 回答