r - 如何将包含一列的数据框写入文件？

Question

这是我的虚拟数据集：

dataset<-data.frame(a=c(1,2,3,4),b=c('a','b','c','d'), c=c("HI","DD","gg","ff"))
g=list(c("a","b"),c(2,3,4), c(44,33,11,22),c("chr","ID","i","II"))
dataset$l<-g
dataset

 a b  c              l
1 1 a HI           a, b
2 2 b DD        2, 3, 4
3 3 c gg 44, 33, 11, 22
4 4 d ff chr, ID, i, II

> mode(dataset$l)
[1] "list"

当我尝试将数据集写入文件时：

> write.table(dataset, "dataset.txt", quote=F, sep="\t")
Error in write.table(x, file, nrow(x), p, rnames, sep, eol, na, dec, as.integer(quote),  : 
  unimplemented type 'list' in 'EncodeElement'

我怎么解决这个问题？

score 3 · Accepted Answer

保存的输出不可读。dump 或 dput 的输出是 ASCII 并且对于了解 R 对象结构的人来说是可读的，但我猜你希望它更传统地排列。

>  apply(dataset, 1, function(x) paste(x, sep=",", collapse=","))
[1] "1,a,HI,c(\"a\", \"b\")"                  
[2] "2,b,DD,c(2, 3, 4)"                       
[3] "3,c,gg,c(44, 33, 11, 22)"                
[4] "4,d,ff,c(\"chr\", \"ID\", \"i\", \"II\")"

反斜杠不出现在文本文件输出中：

 writeLines(con="test.txt", apply(dataset, 1, function(x) paste(x, sep=",", collapse=",")))
#-------output-----
1,a,HI,c("a", "b")
2,b,DD,c(2, 3, 4)
3,c,gg,c(44, 33, 11, 22)
4,d,ff,c("chr", "ID", "i", "II")

score 3 · Accepted Answer

我可以考虑几个选项，具体取决于您要实现的目标。

如果它仅用于显示，那么您可能只需要capture.output()or sink(); 这些都不是很方便读回R：

capture.output(dataset, file="myfile.txt")
### Result is a text file that looks like this:
#   a b  c              l
# 1 1 a HI           a, b
# 2 2 b DD        2, 3, 4
# 3 3 c gg 44, 33, 11, 22
# 4 4 d ff chr, ID, i, II
sink("myfile.txt")
dataset
sink()
## Same result as `capture.output()` approach

如果您希望能够将结果表读回 R 中（尽管没有保留“ l”列是列表的事实），您可以采用类似于 @DWin 建议的方法。

在下面的代码中，该dataset2[sapply...行标识了哪些变量是列表并将它们连接成一个字符串。因此，它们成为简单的字符变量，允许您使用write.table().

dataset2 <- dataset # make a copy just to be on the safe side
dataset2[sapply(dataset2, is.list)] <- apply(dataset2[sapply(dataset2, is.list)], 
                                             1, function(x) 
                                                 paste(unlist(x), 
                                                       sep=", ", collapse=", "))
str(dataset2)
# 'data.frame':    4 obs. of  4 variables:
#  $ a: num  1 2 3 4
#  $ b: Factor w/ 4 levels "a","b","c","d": 1 2 3 4
#  $ c: Factor w/ 4 levels "DD","ff","gg",..: 4 1 3 2
#  $ l: chr  "a, b" "2, 3, 4" "44, 33, 11, 22" "chr, ID, i, II"
write.table(dataset2, "myfile.txt", quote=FALSE, sep="\t")
# can be read back in with: dataset3 <- read.delim("myfile.txt")

score 2 · Accepted Answer

如果要求之一是保留 excel 等的格式，这可能会有所帮助：

  writableTable <- tableFlatten(dataset, filler="")
  #   a b  c l.01 l.02 l.03 l.04
  #   1 a HI    a    b          
  #   2 b DD    2    3    4     
  #   3 c gg   44   33   11   22
  #   4 d ff  chr   ID    i   II

  write.csv(writableTable, "myFile.csv")

tableFlatten使用一个函数listFlatten，顾名思义，它采用嵌套列表并将它们展平。但是，如果列表中的元素大小不同，它会添加填充符（可以是NAs、空格或任何其他用户定义的选项）

它的代码如下。

tableFlatten <- function(tableWithLists, filler="") {
# takes as input a table with lists and returns a flat table
#  empty spots in lists are filled with value of `filler`
#
# depends on: listFlatten(.), findGroupRanges(.), fw0(.)

  # index which columns are lists
  listCols <- sapply(tableWithLists, is.list)

  tableWithLists[listCols]
  tableWithLists[!listCols]

  # flatten lists into table
  flattened <- sapply(tableWithLists[listCols], listFlatten, filler=filler, simplify=FALSE)

  # fix names
  for (i in 1:length(flattened)) colnames(flattened[[i]]) <- fw0(ncol(flattened[[i]]), 2)

  # REASSEMBLE, IN ORDER
    # find pivot point counts
    pivots <- sapply(findGroupRanges(listCols), length)

    #index markers
    indNonList <- indList <- 1

    # nonListGrp <- (0:(length(pivots)/2)) * 2 + 1
    # ListGrp <- (1:(length(pivots)/2)) * 2
    final <- data.frame(row.names=row.names(tableWithLists))
    for (i in 1:length(pivots)) {
      if(i %% 2 == 1) {
          final <- cbind(final, 
                       tableWithLists[!listCols][indNonList:((indNonList<-indNonList+pivots[[i]])-1)]
                       )
      }  else  {
          final <- cbind(final, 
                       flattened[indList:((indList<-indList+pivots[[i]])-1)]
                       )
      }
    }

    return(final)
}


#=====================================

listFlatten <- function(obj, filler=NA) {
## Flattens obj like rbind, but if elements are of different length, plugs in value filler

  # Initialize Vars
  bind <- FALSE

  # IF ALL ELEMENTS ARE MATRIX-LIKE OR VECTORS, MAKE SURE SAME NUMBER OF COLUMNS
  matLike <- sapply(obj, function(x) !is.null(dim(x)))
  vecLike <- sapply(obj, is.vector)

  # If all matrix-like. 
  if (all(matLike))   {
    maxLng <- max(sapply(obj[matLike], ncol))
    obj[matLike] <- lapply(obj[matLike], function(x) t(apply(x, 1, c, rep(filler, maxLng - ncol(x)))))
    bind <- TRUE

  # If all vector-like
  }  else if (all(vecLike))  {
    maxLng <- max(sapply(obj[vecLike], length))
    obj[vecLike] <- lapply(obj[vecLike], function(x) c(x, rep(filler, maxLng - length(x)))) 
    bind <- TRUE

  # If all are either matrix- or vector-like 
  }  else if (all(matLike & vecLike))   {

    maxLng <- max(sapply(obj[matLike], ncol), sapply(obj[vecLike], length))

    # Add in filler's as needed
    obj[matLike] <- 
       lapply(obj[matLike], function(x) t(apply(x, 1, c, rep(filler, maxLng - ncol(x)))))
    obj[vecLike] <- 
       lapply(obj[vecLike], function(x) c(x, rep(filler, maxLng - length(x))))
    bind <- TRUE
  } 

  # If processed and ready to be returned, then just clean it up
  if(bind)  {
    ret <- (do.call(rbind, obj))
    colnames(ret) <- paste0("L", fw0(1:ncol(ret), digs=2))
    return(ret)
  }

  # Otherwise, if obj is sitll a list, continue recursively    
  if (is.list(obj)) { 
      return(lapply(obj, listFlatten))
  }

  # If none of the above, return an error. 
  stop("Unknown object type")
}
#--------------------------------------------

findGroupRanges <- function(booleanVec) {
# returns list of indexes indicating a series of identical values
  pivots <- which(sapply(2:length(booleanVec), function(i) booleanVec[[i]] != booleanVec[[i-1]])) 

  ### THIS ISNT NEEDED... 
  # if (identical(pivots, numeric(0)))
  #   pivots <- length(booleanVec)

  pivots <- c(0, pivots, length(booleanVec))
  lapply(seq(2, length(pivots)), function(i)
    seq(pivots[i-1]+1, pivots[i])
  )
}

#--------------------------------------------



fw0 <- function(num, digs=NULL, mkSeq=TRUE)  {
  ## formats digits with leading 0's. 
  ## num should be an integer or range of integers.
  ## if mkSeq=T, then an num of length 1 will be expanded to seq(1, num).   

  # TODO 1:  put more error check
  if (is.list(num))
    lapply(num, fw0)

  if (!is.vector(num)) {
    stop("num should be integer or vector")
  }

  # convert strings to numbers
  num <- as.numeric(num)

  # If num is a single number and mkSeq is T, expand to seq(1, num)
  if(mkSeq && !length(num)>1)
    num <- (1:num)

  # number of digits is that of largest number or digs, whichever is max
  digs <- max(nchar(max(abs(num))), digs)  

  # if there are a mix of neg & pos numbers, add a space for pos numbs
  posSpace <- ifelse(sign(max(num)) != sign(min(num)), " ", "")

  # return: paste appropriate 0's and preface neg/pos mark
  sapply(num, function(x) ifelse(x<0, 
    paste0("-", paste0(rep(0, max(0, digs-nchar(abs(x)))), collapse=""), abs(x)),
    paste0(posSpace, paste0(rep(0, max(0, digs-nchar(abs(x)))), collapse=""), x)
    ))
}

#-----------------------------------------------

score 1 · Accepted Answer

1

您可以为此使用 dput。

dput(dataset, "dataset.txt")

于 2012-11-24T21:05:11.230 回答

score 1 · Accepted Answer

1

你也可以使用 save()

 save(dataset, file="dataset.RData")

于 2012-11-24T23:59:08.507 回答

score 0 · Accepted Answer

@Ananda 提供的答案非常好，但是，当我有一个包含两列是列表的数据框时遇到了一个问题。

dataset<-data.frame(a=c(1,2,3,4),b=c('a','b','c','d'), c=c("HI","DD","gg","ff"))
g=list(c("a","b"),c(2,3,4), c(44,33,11,22),c("chr","ID","i","II"))
dataset$l<-g
dataset$l2<-g
dataset

  a b  c              l             l2
1 1 a HI           a, b           a, b
2 2 b DD        2, 3, 4        2, 3, 4
3 3 c gg 44, 33, 11, 22 44, 33, 11, 22
4 4 d ff chr, ID, i, II chr, ID, i, II

使用原始答案，两个列表列都包含两列的连接内容。

  a b  c                              l                             l2
1 1 a HI                     a, b, a, b                     a, b, a, b
2 2 b DD               2, 3, 4, 2, 3, 4               2, 3, 4, 2, 3, 4
3 3 c gg 44, 33, 11, 22, 44, 33, 11, 22 44, 33, 11, 22, 44, 33, 11, 22
4 4 d ff chr, ID, i, II, chr, ID, i, II chr, ID, i, II, chr, ID, i, II

相反，试试这个修改后的版本：

dataset2 <- dataset # make a copy just to be on the safe side
dataset2[sapply(dataset2, is.list)] <-
    sapply(dataset2[sapply(dataset2, is.list)], 
           function(x)sapply(x, function(y) paste(unlist(y),collapse=", ") ) )
dataset2

  a b  c              l             l2
1 1 a HI           a, b           a, b
2 2 b DD        2, 3, 4        2, 3, 4
3 3 c gg 44, 33, 11, 22 44, 33, 11, 22
4 4 d ff chr, ID, i, II chr, ID, i, II

score -1 · Accepted Answer

我偶然发现了这一点，虽然有很多很好的答案，但我最终做了其他事情。分享给后人。

library(dplyr)

flatten_list = function(x){
    if (typeof(x) != "list") {
        return(x)
    }
    sapply(x, function(y) paste(y, collapse = " | "))
}

data %>%
    mutate_each(funs(flatten_list)) ->
    write_csv("data.csv")

r - 如何将包含一列的数据框写入文件？

7 回答 7

Related

Reference