r - 将逗号分隔的单元格分隔到新行

Question

嗨，我有一个带有逗号分隔列的表，我需要将逗号分隔的值转换为新行。例如，给定的表是

Name     Start      End 
A        1,2,3    4,5,6
B          1,2      4,5
C      1,2,3,4  6,7,8,9

我需要像这样转换它

Name Start End
   A     1   4
   A     2   5  
   A     3   6
   B     1   4
   B     2   5 
   C     1   6
   C     2   7
   C     3   8   
   C     4   9

我可以使用 VB 脚本来做到这一点，但我需要使用 R 来解决它任何人都可以解决这个问题吗？

score 3 · Accepted Answer

您可能已经在SO上问过这个问题，因为处理统计数据没有问题:)

无论如何，我提出了一个非常复杂且丑陋的解决方案，可能对您有用：

# load your data
x <- structure(list(Name = c("A", "B", "C"), Start = c("1,2,3", "1,2", 
"1,2,3,4"), End = c("4,5,6", "4,5", "6,7,8,9")), .Names = c("Name", 
"Start", "End"), row.names = c(NA, -3L), class = "data.frame")

在 R 中看起来像：

> x
  Name   Start     End length
1    A   1,2,3   4,5,6      3
2    B     1,2     4,5      2
3    C 1,2,3,4 6,7,8,9      4

strsplit在调用的帮助下进行数据转换：

data <- data.frame(cbind(
    rep(x$Name,as.numeric(lapply(strsplit(x$Start,","), length))),
    unlist(lapply(strsplit(x$Start,","), cbind)),
    unlist(lapply(strsplit(x$End,","), cbind))
    ))

命名新数据框：

names(data) <- c("Name", "Start", "End")

看起来像：

> data
  Name Start End
1    A     1   4
2    A     2   5
3    A     3   6
4    B     1   4
5    B     2   5
6    C     1   6
7    C     2   7
8    C     3   8
9    C     4   9

score 2 · Accepted Answer

这是一种适合您的方法。我假设您的三个输入向量位于不同的对象中。我们将创建这些输入的列表并编写一个函数来处理每个对象并以 a data.framewith plyr 的形式返回它们。

这里要注意的是将字符向量拆分为其组成部分，然后as.numeric在拆分时用于将数字从字符形式转换。由于 R 按列填充矩阵，因此我们定义了一个 2 列矩阵并让 R 为我们填充值。然后我们检索 Name 列并将它们放在一个data.frame. plyr很好地处理列表并将其自动转换data.frame为我们的。

library(plyr)

a <- paste("A",1, 2,3,4,5,6, sep = ",", collapse = "")
b <- paste("B",1, 2,4,5, sep = ",", collapse = "")
c <- paste("C",1, 2,3,4,6,7,8,9, sep = ",", collapse = "")

input <- list(a,b,c)

splitter <- function(x) {
    x <- unlist(strsplit(x, ","))
    out <- data.frame(x[1], matrix(as.numeric(x[-1]), ncol = 2))
    colnames(out) <- c("Name", "Start", "End")
    return(out)
}


ldply(input, splitter)

和输出：

> ldply(input, splitter)
 Name Start End
1    A     1   4
2    A     2   5
3    A     3   6
4    B     1   4
5    B     2   5
6    C     1   6
7    C     2   7
8    C     3   8
9    C     4   9

score 1 · Accepted Answer

中的separate_rows()函数tidyr是具有多个分隔值的观察的老板......

# create data 
library(tidyverse)
d <- data_frame(
  Name = c("A", "B", "C"), 
  Start = c("1,2,3", "1,2", "1,2,3,4"), 
  End = c("4,5,6", "4,5", "6,7,8,9")
)
d
# # A tibble: 3 x 3
#    Name   Start     End
#   <chr>   <chr>   <chr>
# 1     A   1,2,3   4,5,6
# 2     B     1,2     4,5
# 3     C 1,2,3,4 6,7,8,9

# tidy data
separate_rows(d, Start, End)
# # A tibble: 9 x 3
#    Name Start   End
#   <chr> <chr> <chr>
# 1     A     1     4
# 2     A     2     5
# 3     A     3     6
# 4     B     1     4
# 5     B     2     5
# 6     C     1     6
# 7     C     2     7
# 8     C     3     8
# 9     C     4     9

# use convert set to TRUE for integer column modes
separate_rows(d, Start, End, convert = TRUE)
# # A tibble: 9 x 3
#    Name Start   End
#   <chr> <int> <int>
# 1     A     1     4
# 2     A     2     5
# 3     A     3     6
# 4     B     1     4
# 5     B     2     5
# 6     C     1     6
# 7     C     2     7
# 8     C     3     8
# 9     C     4     9

score 0 · Accepted Answer

这是另一个，只是为了好玩。取d为原始数据。

f <- function(x, ul = TRUE)
{
    x <- deparse(substitute(x))
    if(ul) unlist(strsplit(d[[x]], ','))
    else strsplit(d[[x]], ',')
}

> data.frame(Name = rep(d$Name, sapply(f(End, F), length)),
             Start = f(Start), End = f(End))
#   Name Start End
# 1    A     1   4
# 2    A     2   5
# 3    A     3   6
# 4    B     1   4
# 5    B     2   5
# 6    C     1   6
# 7    C     2   7
# 8    C     3   8
# 9    C     4   9

r - 将逗号分隔的单元格分隔到新行

4 回答 4

Related

Reference