7

我有如下的专栏。每列有两对,每对都有后缀“a”和“b”——例如 col1a、col1b、colNa、colNb 等,直到文件结束 (> 50000)。

mydataf <- data.frame (Ind = 1:5, col1a = sample (c(1:3), 5, replace = T), 
   col1b = sample (c(1:3), 5, replace = T),  colNa = sample (c(1:3), 5, replace = T),
   colNb = sample (c(1:3),5, replace = T),
     K_a = sample (c("A", "B"),5, replace = T),  
    K_b = sample (c("A", "B"),5, replace = T))

mydataf 
   Ind col1a col1b colNa colNb K_a K_b
1   1     1     1     2     3   B   A
2   2     1     3     2     2   B   B
3   3     2     1     1     1   B   B
4   4     3     1     1     3   A   B
5   5     1     1     3     2   B   A

除了第一列(Ind),我想折叠这对行以使数据框如下所示,同时删除后缀“a”和“b”。还合并的字符或数字按 1 先到 2 排序,A 先到 B

   Ind col1   colN  K_
    1   11     23   AB   
    2   13     22   BB
    3   12     11   BB
    4   13     13   AB
    5   11     23   AB   

编辑:如果列的名称相似,答案中的 grep 函数(可能)有问题。

mydataf <- data.frame (col_1_a = sample (c(1:3), 5, replace = T),
   col_1_b = sample (c(1:3), 5, replace = T),  col_1_Na = sample (c(1:3), 5, replace = T),
   col_1_Nb = sample (c(1:3),5, replace = T),
     K_a = sample (c("A", "B"),5, replace = T),
    K_b = sample (c("A", "B"),5, replace = T))
n <- names(mydataf)
nm <- c(unique(substr(n, 1, nchar(n)-1)))
df <- data.frame(sapply(nm, function(x){
                             idx <- grep(x, n)
                             cols <- mydataf[idx]
                             x <- apply(cols, 1,
                                       function(z) paste(sort(z), collapse = ""))
                             return(x)
                            }))
names(df) <- nm
df

 col_1_ col_1_N K_
1   2233      23 BB
2   2233      22 BB
3   1123      13 AB
4   1223      12 AB
5   2333      33 AB
4

1 回答 1

5
mydataf
  Ind col1a col1b colNa colNb K_a K_b
1   1     2     1     1     1   A   A
2   2     1     2     1     3   B   A
3   3     1     2     3     2   A   A
4   4     1     2     3     1   A   B
5   5     1     2     2     1   A   A
n <- names(mydataf)
nm <- c("Ind", unique(substr(n, 1, nchar(n)-1)[-1]))
df <- data.frame(sapply(nm, function(x){
                             idx <- grep(paste0(x, "[ab]?$"), n)
                             cols <- mydataf[idx]
                             x <- apply(cols, 1, 
                                       function(z) paste(sort(z), collapse = ""))
                             return(x)
                            }))
names(df) <- nm
df
  Ind col1 colN K_
1   1   12   11 AA
2   2   12   13 AB
3   3   12   23 AA
4   4   12   13 AB
5   5   12   12 AA
于 2012-07-27T21:52:42.753 回答