0

我已经被这个问题困扰了一段时间了。需要一些帮助。我正在将以下文件(可能是 3 个文件以上的文件)读入数据框。我的输入文件如下所示: file1:

someName    someMOD someID
A   T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P),S762(P) 1
B   S495(P) 2
C   S162(P),Q159(D) 3
D   S45(P),C47(C),S48(P),S26(P) 4
E   S18(P)  5

文件2:

someName    someMOD someID
C   S162(P),Q159(D) 3
D   S45(P),C47(C),S48(P),S26(P) 4
F   S182(P) 6
E   S18(P)  5
Z   Q100(P) 9
A   T754(P),M691(O),S694(P),S739(P),S740(P) 1

文件3:

someName    someMOD someID
A   T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P) 1
B   S495(P) 2
D   S45(P),C47(C),S48(P),S26(P) 4
E   S18(P)  5
F   S182(P) 6
L   Z182(P) 8
C   S162(P),Q159(D) 3

我的代码:

  fileList <- dir(pattern="*.xls")
  i<-1
  j<-1
  a<-list()
  mybigtable<-data.frame

  for (f in 1:length(fileList)){
    fileName <- fileList[f]
    X <-read.xls(fileName)
    if(regexpr("Drug_Rep", fileName)[1]>0){
      a[[i]]<-X
     }
      i=i+1
    }
    else{
        #Don't do anything
    }
  }

  #Now i want to merge my dataframes
  mymerge <- function(x, y)
    merge(x, y, by=c("someName", "someID"), all=TRUE))

  Reduce(mymerge,a) #passing my list of dataframes 'a'

我在我的“a”列表上做了 dput():

list(structure(list(someName = structure(c(1L, 2L, 4L, 5L, 6L, 
7L, 3L), .Label = c("A", "B", "C", "D", "E", "F", "L"), class = "factor"), 
    someMOD = structure(c(6L, 5L, 4L, 2L, 3L, 7L, 1L), .Label = c("S162(P),Q159(D)", 
    "S18(P)", "S182(P)", "S45(P),C47(C),S48(P),S26(P)", "S495(P)", 
    "T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P)", 
    "Z182(P)"), class = "factor"), someID = c(1L, 2L, 4L, 5L, 
    6L, 8L, 3L)), .Names = c("someName", "someMOD", "someID"), class = "data.frame", row.names = c(NA, 
-7L)), structure(list(someName = structure(1:5, .Label = c("A", 
"B", "C", "D", "E"), class = "factor"), someMOD = structure(c(5L, 
4L, 1L, 3L, 2L), .Label = c("S162(P),Q159(D)", "S18(P)", "S45(P),C47(C),S48(P),S26(P)", 
"S495(P)", "T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P),S762(P)"
), class = "factor"), someID = 1:5), .Names = c("someName", "someMOD", 
"someID"), class = "data.frame", row.names = c(NA, -5L)), structure(list(
    someName = structure(c(2L, 3L, 5L, 4L, 6L, 1L), .Label = c("A", 
    "C", "D", "E", "F", "Z"), class = "factor"), someMOD = structure(c(2L, 
    5L, 4L, 3L, 1L, 6L), .Label = c("Q100(P)", "S162(P),Q159(D)", 
    "S18(P)", "S182(P)", "S45(P),C47(C),S48(P),S26(P)", "T754(P),M691(O),S694(P),S739(P),S740(P)"
    ), class = "factor"), someID = c(3L, 4L, 6L, 5L, 9L, 1L)), .Names = c("someName", 
"someMOD", "someID"), class = "data.frame", row.names = c(NA, 
-6L)))

填充列表时我的错误是什么?非常感谢任何帮助。我只是想得到如下输出:

在此处输入图像描述

4

2 回答 2

2

我之前给你的代码的问题是,merge如果有任何重复的列名,并且你正在合并超过 3 个数据集,就会感到困惑。您必须重命名someMOD列,以免它们发生冲突。for为此目的,循环和任何东西一样有效。

dupvars <- which(!names(a[[1]]) %in% c("someName", "someID"))
for(i in seq_along(a))
    names(a[[i]])[dupvars] <- paste0(names(a[[i]])[dupvars], i)

# and then merge
Reduce(mymerge, a)
于 2013-06-23T10:43:56.267 回答
1

也许问题在于您实际上不是在尝试merge标准意义上的,而是reshape. 在这种情况下,您可以在添加“时间”变量后将rbind所有s 放在一起,并使用from “reshape2” 来获得您所追求的:data.framedcast

添加一个“时间”变量和rbindsdata.frame一起

temp <- do.call(rbind, 
                lapply(seq_along(a), 
                       function(x) data.frame(a[[x]], time = x)))
head(temp)
#   someName                                                 someMOD someID time
# 1        A T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P)      1    1
# 2        B                                                 S495(P)      2    1
# 3        D                             S45(P),C47(C),S48(P),S26(P)      4    1
# 4        E                                                  S18(P)      5    1
# 5        F                                                 S182(P)      6    1
# 6        L                                                 Z182(P)      8    1

data.frame“长”格式转换为“宽”格式

library(reshape2)
dcast(temp, someName + someID ~ time, value.var="someMOD")
#   someName someID                                                       1
# 1        A      1 T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P)
# 2        B      2                                                 S495(P)
# 3        C      3                                         S162(P),Q159(D)
# 4        D      4                             S45(P),C47(C),S48(P),S26(P)
# 5        E      5                                                  S18(P)
# 6        F      6                                                 S182(P)
# 7        L      8                                                 Z182(P)
# 8        Z      9                                                    <NA>
#                                                                 2
# 1 T754(P),M691(O),S692(P),S694(P),S739(P),S740(P),S759(P),S762(P)
# 2                                                         S495(P)
# 3                                                 S162(P),Q159(D)
# 4                                     S45(P),C47(C),S48(P),S26(P)
# 5                                                          S18(P)
# 6                                                            <NA>
# 7                                                            <NA>
# 8                                                            <NA>
#                                         3
# 1 T754(P),M691(O),S694(P),S739(P),S740(P)
# 2                                    <NA>
# 3                         S162(P),Q159(D)
# 4             S45(P),C47(C),S48(P),S26(P)
# 5                                  S18(P)
# 6                                 S182(P)
# 7                                    <NA>
# 8                                 Q100(P)
于 2013-06-23T06:42:50.253 回答