Ordering Permutation in Rcpp i.e. base::order()

不打扰是莪最后的温柔 提交于 2019-11-30 04:03:55

Here's a simple version leveraging Rcpp sugar to implement an order function. We put in a check for duplicates so that we guarantee that things work 'as expected'. (There is also a bug with Rcpp's sort method when there are NAs, so that may want to be checked as well -- this will be fixed by the next release).

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
IntegerVector order_(NumericVector x) {
  if (is_true(any(duplicated(x)))) {
    Rf_warning("There are duplicates in 'x'; order not guaranteed to match that of R's base::order");
  }
  NumericVector sorted = clone(x).sort();
  return match(sorted, x);
}

/*** R
library(microbenchmark)
x <- runif(1E5)
identical( order(x), order_(x) )
microbenchmark(
  order(x),
  order_(x)
)
*/

gives me

> Rcpp::sourceCpp('~/test-order.cpp')

> set.seed(456)

> library(microbenchmark)

> x <- runif(1E5)

> identical( order(x), order_(x) )
[1] TRUE

> microbenchmark(
+   order(x),
+   order_(x)
+ )
Unit: milliseconds
      expr      min       lq   median       uq      max neval
  order(x) 15.48007 15.69709 15.86823 16.21142 17.22293   100
 order_(x) 10.81169 11.07167 11.40678 11.87135 48.66372   100
> 

Of course, if you're comfortable with the output not matching R, you can remove the duplicated check -- x[order_(x)] will still be properly sorted; more specifically, all(x[order(x)] == x[order_(x)]) should return TRUE.

Another solution based on the C++11:

// [[Rcpp::plugins(cpp11)]]
#include <Rcpp.h>
using namespace Rcpp;

template <int RTYPE>
IntegerVector order_impl(const Vector<RTYPE>& x, bool desc) {
    auto n = x.size();
    IntegerVector idx = no_init(n);
    std::iota(idx.begin(), idx.end(), static_cast<size_t>(1));
    if (desc) {
        auto comparator = [&x](size_t a, size_t b){ return x[a - 1] > x[b - 1]; };
        std::stable_sort(idx.begin(), idx.end(), comparator);
    } else {
        auto comparator = [&x](size_t a, size_t b){ return x[a - 1] < x[b - 1]; };
        std::stable_sort(idx.begin(), idx.end(), comparator);
        // simulate na.last
        size_t nas = 0;
        for (size_t i = 0; i < n; ++i, ++nas)
            if (!Vector<RTYPE>::is_na(x[idx[i] - 1])) break;
        std::rotate(idx.begin(), idx.begin() + nas, idx.end());
    }
    return idx;
}

// [[Rcpp::export]]
IntegerVector order2(SEXP x, bool desc = false) {
    switch(TYPEOF(x)) {
    case INTSXP: return order_impl<INTSXP>(x, desc);
    case REALSXP: return order_impl<REALSXP>(x, desc);
    case STRSXP: return order_impl<STRSXP>(x, desc);
    default: stop("Unsupported type.");
    }
}

/***R
int <- sample.int(1000, 1E5, replace = TRUE)
dbl <- runif(1E5)
chr <- sample(letters, 1E5, replace = TRUE)
library(benchr)
benchmark(order(int), order2(int))
benchmark(order(dbl), order2(dbl))
benchmark(order(chr), order2(chr))
*/

Compare performance:

R> int <- sample.int(1000, 1E5, replace = TRUE)

R> dbl <- runif(1E5)

R> chr <- sample(letters, 1E5, replace = TRUE)

R> library(benchr)

R> benchmark(order(int), order2(int))
Benchmark summary:
Time units : microseconds 
        expr n.eval  min lw.qu median mean up.qu  max  total relative
 order(int)    100  442   452    464  482   486 1530  48200      1.0
order2(int)    100 5150  5170   5220 5260  5270 6490 526000     11.2

R> benchmark(order(dbl), order2(dbl))
Benchmark summary:
Time units : milliseconds 
        expr n.eval   min lw.qu median  mean up.qu  max total relative
 order(dbl)    100 13.90 14.00  14.20 14.80  15.8 17.4  1480     1.98
order2(dbl)    100  7.11  7.13   7.15  7.26   7.3  8.8   726     1.00

R> benchmark(order(chr), order2(chr))
Benchmark summary:
Time units : milliseconds 
        expr n.eval   min lw.qu median  mean up.qu   max total relative
 order(chr)    100 128.0 131.0  133.0 133.0 134.0 148.0 13300     7.34
order2(chr)    100  17.7  17.9   18.1  18.2  18.3  22.2  1820     1.00

Note that radix method from the base order much faster.

Michel

Here is another approach using std::sort.

typedef std::pair<int, double> paired;

bool cmp_second(const paired & left, const paired & right) {
    return left.second < right.second;
}

Rcpp::IntegerVector order(const Rcpp::NumericVector & x) {
    const size_t n = x.size();
    std::vector<paired> pairs;
    pairs.reserve(n);

    for(size_t i = 0; i < n; i++)
        pairs.push_back(std::make_pair(i, x(i)));

    std::sort(pairs.begin(), pairs.end(), cmp_second<paired>);

    Rcpp::IntegerVector result = Rcpp::no_init(n);
    for(size_t i = 0; i < n; i++)
        result(i) = pairs[i].first;
    return result;
}
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!