2

我正在努力合并两个 data.frame,其中 na 值出现在一个或另一个 df 中。

sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L, 
    GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L, 
    GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_integer_, 
    GB28.1 = NA_integer_, GB15 = 142L, GB15.1 = 144L, GB02 = 197L, 
    GB02.1 = 197L, GB10 = 126L, GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp", 
"Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", 
"GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", 
"GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"
), row.names = 32L, class = "data.frame")


sampleB <- structure(list(Nom_xp = "A1MRJ", Rep = 2L, GB05 = NA, GB05.1 = NA, 
    GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L, 
    GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = 390L, GB28.1 = 390L, 
    GB15 = 142L, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L, 
    GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp", 
"Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", 
"GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", 
"GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"
), row.names = 33L, class = "data.frame")

需要输出,作为 data.frame。只有一行用于匹配“Nom_xp”,因此如果值存在于一个或另一个 DF 中,则 NA get 将替换为 A 或 B 中的值。

Nom_xp  GB05  GB05  GB18  GB18  GB06  GB06  GB27  GB27  GB24  GB24  GB28    GB28    GB15  GB15  GB02  GB02  GB10  GB10  GB14  GB14
A1MRJ   102 102 177 177 240 240 169 169 240 242 390 390 142 144 197 197 126 134 181 193

我本来以为:

output <- merge(A,B,by="Nom_xp",all.x=T,all.y=T)

或者

output <- join(A,B,by="Nom_xp",match="all")

会给我我需要的东西,但到目前为止还没有运气......我错过了什么?实际的 data.frame 不止一行。

4

2 回答 2

1

你只有一排吗?那么,这还不够吗?你可以得到结果sampleB

sampleB[, is.na(sampleB)] <- sampleA[, is.na(sampleB)]

不,我认为这里不需要应用、加入和合并。未经测试,但这会起作用。

sampleB[is.na(sampleB)] <- sampleA[is.na(sampleB)]
于 2013-02-13T22:05:06.920 回答
0

不确定您的整个数据集的外观如何,但我假设您可以有多个具有相同“Nom_xp”的样本,而不仅仅是 2 个?并且您可能将所有数据都保存在一个大数据框中或类似的地方?

如果是这样,也许这段代码可能是一个好的开始(也许有人可以帮助并重新编写更有效的代码?)。无论如何:

sampleA <- structure(list(Nom_xp = "A1MRJ", Rep = 1L, GB05 = 102L, GB05.1 = 102L, 
                          GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L, 
                          GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = NA_integer_, 
                          GB28.1 = NA_integer_, GB15 = 142L, GB15.1 = 144L, GB02 = 197L, 
                          GB02.1 = 197L, GB10 = 126L, GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1","GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15","GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 32L, class = "data.frame")

sampleB <- structure(list(Nom_xp = "A1MRJ", Rep = 2L, GB05 = NA, GB05.1 = NA, 
                          GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L, 
                          GB27.1 = 169L, GB24 = 240L, GB24.1 = 242L, GB28 = 390L, GB28.1 = 390L, 
                          GB15 = 142L, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L, 
                          GB10.1 = 134L, GB14 = 181L, GB14.1 = 193L), .Names = c("Nom_xp","Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"  ), row.names = 33L, class = "data.frame")

sampleC <- structure(list(Nom_xp = "ASDF", Rep = 2L, GB05 = NA, GB05.1 = NA, 
                          GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 12349L, 
                          GB27.1 = 3, GB24 = 234112, GB24.1 = 242L, GB28 = 234, GB28.1 = 390L, 
                          GB15 = NA, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 126L, 
                          GB10.1 = 134L, GB14 = NA, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 34L, class = "data.frame")

sampleD <- structure(list(Nom_xp = "ASDF", Rep = 2L, GB05 = 214, GB05.1 = 34, 
                          GB18 = 177L, GB18.1 = 177L, GB06 = 240L, GB06.1 = 240L, GB27 = 169L, 
                          GB27.1 = 3, GB24 = NA, GB24.1 = 242L, GB28 = 234, GB28.1 = 390L, 
                          GB15 = 56, GB15.1 = 144L, GB02 = 197L, GB02.1 = 197L, GB10 = 15466L, 
                          GB10.1 = 134L, GB14 = 34, GB14.1 = 193L), .Names = c("Nom_xp", "Rep", "GB05", "GB05.1", "GB18", "GB18.1", "GB06", "GB06.1", "GB27", "GB27.1", "GB24", "GB24.1", "GB28", "GB28.1", "GB15", "GB15.1", "GB02", "GB02.1", "GB10", "GB10.1", "GB14", "GB14.1"), row.names = 35L, class = "data.frame")

cdat<-rbind(sampleA,sampleB,sampleC,sampleD) #simulating your data set (?)
dcols<-dim(cdat)[2]

mat<-matrix(nrow=length(unique(cdat$Nom_xp)),ncol=dcols)
colnames(mat)<-colnames(cdat)
for (j in 1:length(unique(cdat$Nom_xp))) 
{
  g<-grep(unique(cdat$Nom_xp)[j],cdat$Nom_xp)   #Get the Nom_xp rows that match
  mat[j,1]<-cdat[g[1],1]                        #Fill in the "Nom_xp"
  mat[j,2]<-paste(g,collapse=" ")               #Fill in the "rep"
  mat[j,3:dcols]<-apply(cdat[g,3:dcols],2,      #Calculate a mean for each column
   function(x){as.numeric(mean(x,na.rm=T))})          
}
于 2013-02-13T23:18:11.383 回答