-2

我有以下数据:

>Data
#  X        Y
#  1   123 234 345 456 
#  2   222 333 444 555 666

我想要以下结果:

>Data
#  X     Y
#  1    123 
#  1    234 
#  1    345 
#  1    456 
#  2    222 
#  2    333 
#  2    444
#  2    555 
#  2    666

请帮忙!

4

3 回答 3

2

实际上并没有建立在您之前的问题上。请尝试自己做一些功课,或者至少展示您到目前为止所做的尝试!

Data1 <- cbind(X = Data$X, 
               read.table(text = as.character(Data$Y), 
                          fill = TRUE, header = FALSE))
data.frame(X = Data1[, 1], stack(Data1[-1]))
#    X values ind
# 1  1    123  V1
# 2  2    222  V1
# 3  1    234  V2
# 4  2    333  V2
# 5  1    345  V3
# 6  2    444  V3
# 7  1    456  V4
# 8  2    555  V4
# 9  1     NA  V5
# 10 2    666  V5
于 2013-06-10T12:16:26.323 回答
2

如果您的列Y是字符向量,那么应该这样做:

out <- stack(setNames(strsplit(df$Y, " "), df$X))
out$values <- as.numeric(out$values)

在 20e4 行 data.frame 上对所有答案进行基准测试:

创建数据:

df <- read.table(header=TRUE, text="X        Y
    1   '123 234 345 456'
    2   '222 333 444 555 666'")
# thanks to @MatthewPlourde for the suggestion to use replicate
df <- do.call(rbind, replicate(10000, df, simplify = FALSE))

dim(df)
# [1] 20000     2

sapply(df, class)
        X         Y 
"integer"  "factor" 

职能:

# Arun's function
Arun <- function(df) {
    out <- stack(setNames(strsplit(as.character(df$Y), " "), df$X))
    out$values <- as.numeric(out$values)
    out
}

# Ananda's function
Ananda <- function(Data) {
    Data1 <- cbind(X = Data$X, 
           read.table(text = as.character(Data$Y), 
                      fill = TRUE, header = FALSE))
    data.frame(X = Data1[, 1], stack(Data1[-1]))
}

# Matthew's solution
Matthew <- function(d) {
    stack(by(d$Y, d$X, function(x) 
            as.numeric(scan(text=as.character(x), 
            what='', quiet=TRUE))))
}

基准测试:

require(microbenchmark)
microbenchmark(a1 <- Arun(df), a2 <- Ananda(df), a3 <- Matthew(df), times = 5)
Unit: milliseconds
              expr       min        lq    median        uq       max neval
    a1 <- Arun(df)  235.6945  258.8485  264.4166  329.2974  392.9559     5
  a2 <- Ananda(df) 6661.8461 6972.2823 7825.3701 8210.9970 9454.5762     5
 a3 <- Matthew(df) 3589.1784 3691.3826 3787.4163 4020.4895 5034.6580     5
于 2013-06-10T12:17:19.470 回答
2

这里已经有了很好的答案,但这是byand的另一种方式scan

stack(by(d$Y, d$X, 
         function(x) as.numeric(scan(text=as.character(x), what='', quiet=TRUE))))
于 2013-06-10T12:36:22.570 回答