我正在尝试组合两个表并按两个表之间的组提取值。
我有 test1 表:
structure(list(Similarity = c(999L, 888L, 756L, 879L, 567L, 567L), Peak = c(797L, 833L,999L, 798L, 834L, 444L), Name = structure(c(2L, 5L, 6L, 1L, 3L, 4L), .Label = c("Benzene", "Cyclopentane", "Hexadecane", "Nafhtalene", "Tetradecene", "dodecadiene"), class = "factor"),Sample1 = c(22237L, 86032349L, NA, NA, NA, 3333L), Sample2 = c(444567L,
NA, NA, NA, 115127L, 22222L)), .Names = c("Similarity", "Peak", "Name", "Sample1", "Sample2"), class = "data.frame", row.names = c(NA, -6L))
和 test2 表:
structure(list(Similarity = c(757L, 859L, 999L, 879L, 577L),
Peak = c(798L, 797L, 999L, 834L, 833L), Name = structure(c(1L, 2L, 5L, 3L, 4L), .Label = c("Benzene", "Cyclopentane", "Hexadecane","Tetradecene", "dodecadiene"), class ="factor"),Sample3 = c(NA, 115127L, NA, NA, 86032349L), Sample4 = c(NA, 43359706L, NA,115127L, NA)),.Names = c("Similarity", "Peak", "Name", "Sample3", "Sample4"), class = "data.frame",row.names = c(NA, -5L))
当我合并两个表时:
cbind(test1,test2)
structure(list(Row.names = structure(c("1", "2", "3", "4", "5"), class ="AsIs"),Similarity.x = c(999L, 888L, 756L, 879L, 567L), Peak.x = c(797L, 833L, 999L, 798L, 834L), Name.x = structure(c(2L, 5L, 6L, 1L, 3L),.Label=c("Benzene","Cyclopentane","Hexadecane", "Nafhtalene", "Tetradecene", "dodecadiene"), class = "factor"), Sample1 = c(22237L, 86032349L, NA, NA, NA), Sample2 = c(444567L, NA, NA, NA, 115127L), Similarity.y = c(757L, 859L, 999L,879L, 577L), Peak.y = c(798L, 797L, 999L, 834L, 833L), Name.y = structure(c(1L,2L, 5L, 3L, 4L), .Label = c("Benzene", "Cyclopentane", "Hexadecane", "Tetradecene", "dodecadiene"), class = "factor"), Sample3 = c(NA, 115127L, NA, NA, 86032349L), Sample4 = c(NA, 43359706L, NA,115127L, NA)), .Names = c("Row.names", "Similarity.x", "Peak.x","Name.x", "Sample1", "Sample2", "Similarity.y", "Peak.y", "Name.y","Sample3", "Sample4"), row.names = c(NA, -5L), class = "data.frame")
我需要合并具有相同名称的行,维护对应的样本并提取最大相似度。
像这样的东西:
structure(list(Similarity = c(757L, 859L, 999L, 879L, 577L, 567L), Peak = c(798L, 797L, 999L, 834L, 833L, 444L), Name = structure(c(1L, 2L, 6L, 3L, 5L, 4L), .Label = c("Benzene", "Cyclopentane", "Hexadecane","Nafhtalene", "Tetradecene", "dodecadiene"), class = "factor"), Sample1 = c(NA, 22237L, NA, NA, 86032349L, 3333L), Sample2 = c(NA,444567L, NA, 115127L, NA, 22222L), Sample3 = c(NA, 115127L,NA, NA, 86032349L, NA), Sample4 = c(NA, 43359706L, NA, 115127L,NA, NA)), .Names = c("Similarity", "Peak", "Name", "Sample1", "Sample2", "Sample3", "Sample4"), class ="data.frame", row.names = c(NA,-6L))
有什么建议么?