2

我想按行对这个矩阵进行降序排序

 > set.seed(123); a <- matrix(rbinom(100,10,0.3),ncol=10)

      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    2    6    5    6    1    1    4    4    2     1
 [2,]    4    3    4    5    3    3    1    3    4     4
 [3,]    3    4    3    4    3    4    3    4    3     2
 [4,]    5    3    7    4    2    1    2    0    4     4
 [5,]    5    1    4    0    2    3    4    3    1     2
 [6,]    1    5    4    3    1    2    3    2    3     2
 [7,]    3    2    3    4    2    1    4    2    6     4
 [8,]    5    1    3    2    3    4    4    3    5     1
 [9,]    3    2    2    2    2    5    4    2    5     3
[10,]    3    6    1    2    5    2    3    1    2     3

> do.call(order,as.list(a[1,],a[2,]))
[1] 1

如何使用 do.call 和 order 对矩阵进行排序?

编辑。固定上面的矩阵以符合上面的代码。

4

3 回答 3

5

两种选择:

# Jaap
do.call(rbind, lapply(split(a, row(a)), sort, decreasing = TRUE))

# adaption of lmo's solution in the comments
for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)]

给出:

   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
1     6    6    5    4    4    2    2    1    1     1
2     5    4    4    4    4    3    3    3    3     1
3     4    4    4    4    3    3    3    3    3     2
4     7    5    4    4    4    3    2    2    1     0
5     5    4    4    3    3    2    2    1    1     0
6     5    4    3    3    3    2    2    2    1     1
7     6    4    4    4    3    3    2    2    2     1
8     5    5    4    4    3    3    3    2    1     1
9     5    5    4    3    3    2    2    2    2     2
10    6    5    3    3    3    2    2    2    1     1

一个基准:

library(microbenchmark)
microbenchmark(dc.lapply.sort = do.call(rbind, lapply(split(a, row(a)), sort, decreasing = TRUE)),
               t.apply.sort = t(apply(a, 1, sort, decreasing = TRUE)),
               for.order = for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)],
               for.sort = for(i in 1:nrow(a)) a[i,] <- sort(a[i,], decreasing = TRUE),
               for.sort.list = for(x in seq_len(nrow(a))) a[x,] <- a[x,][sort.list(a[x,], decreasing = TRUE, method="radix")])

给出:

Unit: microseconds
           expr     min       lq      mean   median       uq      max neval cld
 dc.lapply.sort 189.811 206.5890 222.52223 217.8070 228.0905  332.034   100   c
   t.apply.sort 185.474 200.4515 212.59608 210.4930 220.0025  286.288   100  bc
      for.order  82.631  91.1860  98.66552  97.8475 102.9680  176.666   100 a  
       for.sort 167.939 187.5025 192.90728 192.1195 198.8690  256.494   100  b 
  for.sort.list 187.617 206.4475 230.82960 215.7060 221.6115 1541.343   100   c

然而应该注意的是,基准只对较大的数据集有意义,所以:

set.seed(123)
a <- matrix(rbinom(10e5, 10, 0.3), ncol = 10)

microbenchmark(dc.lapply.sort = do.call(rbind, lapply(split(a, row(a)), sort, decreasing = TRUE)),
               t.apply.sort = t(apply(a, 1, sort, decreasing = TRUE)),
               for.order = for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)],
               for.sort = for(i in 1:nrow(a)) a[i,] <- sort(a[i,], decreasing = TRUE),
               for.sort.list = for(x in seq_len(nrow(a))) a[x,] <- a[x,][sort.list(a[x,], decreasing = TRUE, method="radix")],
               times = 10)

给出:

Unit: seconds
           expr      min       lq     mean   median       uq      max neval  cld
 dc.lapply.sort 6.790179 6.924036 7.036330 7.013996 7.121343 7.351729    10    d
   t.apply.sort 5.032052 5.057022 5.151560 5.081459 5.177159 5.538416    10   c 
      for.order 1.368351 1.463285 1.514652 1.471467 1.583873 1.736544    10 a   
       for.sort 5.028314 5.102993 5.317597 5.154104 5.348614 6.123278    10   c 
  for.sort.list 2.417857 2.464817 2.573294 2.519408 2.726118 2.815964    10  b  

结论:与for-loop 结合使用order仍然是最快的解决方案。


使用 -package 的order2sort2功能grr可以进一步提高速度。将它们与上面最快的解决方案进行比较:

set.seed(123)
a <- matrix(rbinom(10e5, 10, 0.3), ncol = 10)

microbenchmark(for.order = for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)],
               for.order2 = for(i in 1:nrow(a)) a[i,] <- a[i,][rev(grr::order2(a[i,]))],
               for.sort2 = for(i in 1:nrow(a)) a[i,] <- rev(grr::sort2(a[i,])),
               times = 10)

给予:

Unit: milliseconds
       expr       min        lq      mean    median        uq      max neval cld
  for.order 1243.8140 1263.4423 1316.4662 1305.1823 1378.5836 1404.251    10   c
 for.order2  956.1536  962.8226 1110.1778 1090.9984 1233.4241 1368.416    10  b 
  for.sort2  830.1887  843.6765  920.5668  847.1601  972.8703 1144.135    10 a  
于 2017-01-11T15:24:00.250 回答
0

我也做了微基准测试,似乎订单解决方案赢了:)

>     microbenchmark(jaap1 = do.call(rbind, lapply(split(a, row(a)), sort, decreasing = TRUE)),
+                    apom = t(apply(a, 1, sort, decreasing = TRUE)),
+                    jaap2 = for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)],
+                    jaap3 = for(i in 1:nrow(a)) a[i,] <- sort(a[i,], decreasing = TRUE), 
+                    alpha = t(apply(a, 1, function(x) order(x, decreasing = T))),
+                    times = 1000L)
Unit: microseconds
  expr     min       lq     mean   median       uq      max neval
 jaap1 318.193 364.6125 404.3224 389.5845 417.6405 1422.087  1000
  apom 276.764 340.2740 389.1302 364.9650 398.3680 2854.710  1000
 jaap2 121.332 158.4845 189.5616 182.2070 202.2390 1170.602  1000
 jaap3 247.387 309.2445 351.6959 332.2710 365.3640 1361.720  1000
 alpha 139.244 178.7460 209.6122 202.8580 226.7585 1092.301  1000
于 2017-01-11T16:10:36.540 回答
0

t(apply(a, 1, sort, decreasing = TRUE))给出:

#       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#  [1,]    6    6    5    4    4    2    2    1    1     1
#  [2,]    5    4    4    4    4    3    3    3    3     1
#  [3,]    4    4    4    4    3    3    3    3    3     2
#  [4,]    7    5    4    4    4    3    2    2    1     0
#  [5,]    5    4    4    3    3    2    2    1    1     0
#  [6,]    5    4    3    3    3    2    2    2    1     1
#  [7,]    6    4    4    4    3    3    2    2    2     1
#  [8,]    5    5    4    4    3    3    3    2    1     1
#  [9,]    5    5    4    3    3    2    2    2    2     2
# [10,]    6    5    3    3    3    2    2    2    1     1
于 2017-01-11T15:43:57.433 回答