1

我有一个数据框列表,每个数据框都有一些重叠的列。列表中的数据帧数量未知。我怎样才能有效地,在基础,rbind 数据帧一起并用零填充非重叠列?

示例数据:

x <- data.frame(a=1:2, b=1:2, c=1:2)
y <- data.frame(a=1:2, r=1:2, f=1:2)
z <- data.frame(b=1:3, c=1:3, v=1:3, t=c("A", "A", "D"))

L1 <- list(x, y, z)

期望的输出:

  a b c f r t v
1 1 1 1 0 0 0 0
2 2 2 2 0 0 0 0
3 1 0 0 1 1 0 0
4 2 0 0 2 2 0 0
5 0 1 1 0 0 A 1
6 0 2 2 0 0 A 2
7 0 3 3 0 0 D 3
4

2 回答 2

3

用缺失的列填充每个数据帧,然后 rbind :

allnames <- unique(unlist(lapply(L1, names)))
do.call(rbind, lapply(L1, function(df) {
    not <- allnames[!allnames %in% names(df)]
    df[, not] <- 0
    df
}))
于 2013-11-06T05:22:32.263 回答
1

我有一个旧的(可能效率低下的)功能可以做到这一点。我在这里做了一项修改以允许指定填充。

RBIND <- function(datalist, keep.rownames = TRUE, fill = NA) {
  Len <- sapply(datalist, ncol)
  if (all(diff(Len) == 0)) {
    temp <- names(datalist[[1]])
    if (all(sapply(datalist, function(x) names(x) %in% temp))) tryme <- "basic"
    else tryme <- "complex"
  } 
  else tryme <- "complex"
  almost <- switch(
    tryme,
    basic = { do.call("rbind", datalist) },
    complex = {
      Names <- unique(unlist(lapply(datalist, names)))
      NROWS <- c(0, cumsum(sapply(datalist, nrow)))
      NROWS <- paste(NROWS[-length(NROWS)]+1, NROWS[-1], sep=":")
      out <- lapply(1:length(datalist), function(x) {
        emptyMat <- matrix(fill, nrow = nrow(datalist[[x]]), ncol = length(Names))
        colnames(emptyMat) <- Names
        emptyMat[, match(names(datalist[[x]]), 
                         colnames(emptyMat))] <- as.matrix(datalist[[x]])
        emptyMat
      })
      do.call("rbind", out)
    })
  Final <- as.data.frame(almost, row.names = 1:nrow(almost))
  Final <- data.frame(lapply(Final, function(x) type.convert(as.character(x))))
  if (isTRUE(keep.rownames)) {
    row.names(Final) <- make.unique(unlist(lapply(datalist, row.names)))
  } 
  Final
}

这是您的示例数据。

RBIND(L1, fill = 0)
#     a b c r f v t
# 1   1 1 1 0 0 0 0
# 2   2 2 2 0 0 0 0
# 1.1 1 0 0 1 1 0 0
# 2.1 2 0 0 2 2 0 0
# 1.2 0 1 1 0 0 1 A
# 2.2 0 2 2 0 0 2 A
# 3   0 3 3 0 0 3 D
于 2013-11-06T05:24:05.823 回答