Fastest way to sort each row of a large matrix in R

后端 未结 3 1174
北恋
北恋 2020-12-10 15:50

I have a large matrix:

set.seed(1)
a <- matrix(runif(9e+07),ncol=300)

I want to sort each row in the matrix:

> system         


        
3条回答
  •  长情又很酷
    2020-12-10 16:10

    Another excellent method from Martin Morgan without any usage of external packages in Fastest way to select i-th highest value from row and assign to new column:

    matrix(a[order(row(a), a)], ncol=ncol(a))
    

    There is also an equivalent for sorting by columns under comments in the same link.

    Timing code using same data as Craig:

    set.seed(1)
    a <- matrix(runif(9e7),ncol=300)
    
    use_for <- function(){
        sorted3 <- a
        for(i in seq_len(nrow(a))) 
            sorted3[i,] <- sort.int(a[i,], method='quick') 
        sorted3
    }
    
    microbenchmark::microbenchmark(times=3L,
        t(apply(a,1,sort)),
        t(apply(a,1,sort.int, method='quick')),
        use_for(),
        Rfast::rowSort(a),
        t(apply(a,1,grr::sort2)),
        matrix(a[order(row(a), a)], ncol=ncol(a))
    )
    

    Timings:

    Unit: seconds
                                            expr       min        lq      mean    median        uq       max neval
                            t(apply(a, 1, sort)) 37.875665 40.143190 41.098627 42.410715 42.710108 43.009502     3
      t(apply(a, 1, sort.int, method = "quick")) 26.406063 27.146861 27.714226 27.887659 28.368307 28.848955     3
                                       use_for() 20.038295 20.140692 20.504223 20.243088 20.737187 21.231285     3
                               Rfast::rowSort(a)  6.105679  6.460003  6.836455  6.814326  7.201844  7.589361     3
                      t(apply(a, 1, grr::sort2)) 11.912422 13.035231 13.667377 14.158040 14.544854 14.931669     3
     matrix(a[order(row(a), a)], ncol = ncol(a)) 10.307094 10.789946 11.294119 11.272797 11.787632 12.302466     3
    

    And to present a more complete picture, another test for character class (excluding Rfast::rowSort as it cannot handle character class):

    set.seed(1)
    a <- matrix(sample(letters, 9e6, TRUE),ncol=300)
    
    microbenchmark::microbenchmark(times=1L,
        t(apply(a,1,sort)),
        t(apply(a,1,sort.int, method='quick')),
        use_for(),
        #Rfast::rowSort(a),
        t(apply(a,1,grr::sort2)),
        matrix(a[order(row(a), a, method="radix")], ncol=ncol(a))
    )
    

    Timings:

    Unit: milliseconds
                                                              expr        min         lq       mean     median         uq        max neval
                                              t(apply(a, 1, sort)) 30392.7951 30392.7951 30392.7951 30392.7951 30392.7951 30392.7951     1
                        t(apply(a, 1, sort.int, method = "quick")) 29359.7711 29359.7711 29359.7711 29359.7711 29359.7711 29359.7711     1
                                                         use_for() 31018.8827 31018.8827 31018.8827 31018.8827 31018.8827 31018.8827     1
                                        t(apply(a, 1, grr::sort2))  2539.1711  2539.1711  2539.1711  2539.1711  2539.1711  2539.1711     1
     matrix(a[order(row(a), a, method = "radix")], ncol = ncol(a))   480.7405   480.7405   480.7405   480.7405   480.7405   480.7405     1
    

    Head to head:

    set.seed(1)
    a <- matrix(sample(letters, 9e7, TRUE),ncol=300)
    microbenchmark::microbenchmark(times=1L,
        t(apply(a,1,grr::sort2)),
        matrix(a[order(row(a), a, method="radix")], ncol=ncol(a))
    )
    

    Timings:

    Unit: seconds
                                                              expr       min        lq      mean    median        uq       max neval
                                        t(apply(a, 1, grr::sort2)) 29.098726 29.098726 29.098726 29.098726 29.098726 29.098726     1
     matrix(a[order(row(a), a, method = "radix")], ncol = ncol(a))  7.067744  7.067744  7.067744  7.067744  7.067744  7.067744     1
    

提交回复
热议问题