6

我看到一些意外的合并行为(或者至少不完全直观)。但也许我只是不明白它应该如何工作:

让我们首先创建一些虚拟数据来玩:

x <- structure(list(A = c(2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L), B = c(2L, 2L, 1L, 2L, 
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L
), C = c(2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 
2L, 1L, 1L, 1L, 1L, 2L, 2L), D = c(2L, 1L, 2L, 2L, 2L, 1L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L), E = c(2L, 
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L), F = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), G = c(2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L), 
    H = c(1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 2L, 1L, 2L, 1L, 1L, 1L), I = c(1L, 1L, 2L, 2L, 2L, 1L, 
    1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L), 
    J = c(2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    2L, 2L, 2L, 2L, 1L, 2L, 1L), K = c(3, 3, 1, 3, 1, 3, 1, 2, 
    2, 2, 1, 3, 2, 2, 2, 1, NA, 1, 2, 1)), .Names = c("A", "B", 
"C", "D", "E", "F", "G", "H", "I", "J", "K"), row.names = c(NA, 
20L), class = "data.frame")

# Generate Listing of All Possible Combinations 
y <- list(1:2); y = expand.grid(rep(y,10)); 
colnames(y) <- LETTERS[1:10]
y <- rbind(y,y,y)
y$K <- rep(1:3,each=1024)
y$mergekey <- sample(1:6,3072,replace=TRUE) 

我的期望是,当我合并这两个数据集时,该设置sort=FALSEall.x=TRUE为我提供一个包含所有 x 的列表mergekey

让我们试试:

merge(x,y,all.x=TRUE,sort=FALSE)
   A B C D E F G H I J  K mergekey
1  2 2 2 2 2 1 2 1 1 2  3        5
2  2 2 1 1 1 1 2 2 1 1  3        3
3  2 1 2 2 1 1 2 1 2 2  1        3
4  2 2 1 2 2 1 2 2 2 2  3        2
5  1 1 2 2 2 2 2 1 2 2  1        4
6  2 1 1 1 2 2 2 2 1 2  3        6
7  1 1 1 1 2 2 2 2 1 2  1        5
8  2 1 2 2 1 1 2 2 1 1  2        4
9  2 2 2 1 1 1 2 1 2 2  2        4
10 2 1 2 2 1 1 2 1 1 1  2        2
11 2 1 2 1 1 1 2 1 2 2  1        4
12 2 2 1 2 1 2 2 1 2 1  3        5
13 2 1 2 1 1 1 2 1 2 2  2        3
14 2 1 2 1 1 1 2 1 2 2  2        3
15 2 2 2 1 2 1 2 1 2 2  2        1
16 2 1 1 2 1 1 2 2 2 2  2        1
17 2 1 1 1 1 1 2 1 1 2  1        2
18 1 2 1 1 1 2 2 1 1 1  1        5
19 2 1 2 1 1 1 2 1 1 1  1        4
20 2 2 1 2 1 1 1 2 1 2 NA       NA

现在似乎“x 的大部分是未排序的”,但无与伦比的被推到最后,而不是保持它们的顺序。

所以,我的问题是:我如何让无与伦比的人留在原地?

PS:如果合并被告知不要排序,将无与伦比的推到最后似乎不是有点不直观吗?我也不认为这与这种行为一致

4

1 回答 1

3

包中的join函数plyr直观地解决了这个问题,没有额外的争论。

library(plyr)
join(x,y)

Joining by: A, B, C, D, E, F, G, H, I, J, K
   A B C D E F G H I J  K mergekey
1  2 2 2 2 2 1 2 1 1 2  3        4
2  2 2 1 1 1 1 2 2 1 1  3        3
3  2 1 2 2 1 1 2 1 2 2  1        5
4  2 2 1 2 2 1 2 2 2 2  3        3
5  1 1 2 2 2 2 2 1 2 2  1        6
6  2 1 1 1 2 2 2 2 1 2  3        6
7  1 1 1 1 2 2 2 2 1 2  1        4
8  2 1 2 2 1 1 2 2 1 1  2        2
9  2 2 2 1 1 1 2 1 2 2  2        4
10 2 1 2 2 1 1 2 1 1 1  2        6
11 2 1 2 1 1 1 2 1 2 2  1        1
12 2 2 1 2 1 2 2 1 2 1  3        3
13 2 1 2 1 1 1 2 1 2 2  2        2
14 2 2 2 1 2 1 2 1 2 2  2        6
15 2 1 1 2 1 1 2 2 2 2  2        2
16 2 1 1 1 1 1 2 1 1 2  1        3
17 2 2 1 2 1 1 1 2 1 2 NA       NA
18 1 2 1 1 1 2 2 1 1 1  1        1
19 2 1 2 1 1 1 2 1 2 2  2        2
20 2 1 2 1 1 1 2 1 1 1  1        1
于 2012-08-30T17:24:05.090 回答