Sample with a max

前端未结

关注

 5  1229

日久生厌 2021-01-19 01:57

If I want to sample numbers to create a vector I do:

set.seed(123)
x <- sample(1:100,200, replace = TRUE)
sum(x)
# [1] 10228

What if I

5条回答

温柔的废话 (楼主)

2021-01-19 02:52

An attempt using R

# Config
n <- 20L
target <- 100L
vec <- seq(100)
set.seed(123)

# R repeat loop
sumto_repeat <- function(vec,n,target) {
  res <- integer()
  repeat {
    cat("begin:",sum(res),length(res),"\n")
    res <- c( res, sample(vec,1) )
    if( sum(res)target) res <- res[-length(res)]
    if( length(res)>n | length(res) sum(test)
[1] 100
> length(test)
[1] 20

Also, I'd give some thought to what distribution you'd like to be drawing from. I think that there are a few different ways of getting it to sum to exactly target with n elements (for instance, you could make the last element always be target - sum(res)) that may or may not have different distributional implications.

A very similar algorithm in Rcpp, for speeeeed!

cpp_src <- '
Rcpp::IntegerVector xa = clone(x); // Vector to be sampled
Rcpp::IntegerVector na(n); // Number of elements in solution
Rcpp::IntegerVector sa(s); // Sum of solution

int nsampled;
int currentSum;
int dropRandomIndex;
int numZeroes;
Rcpp::IntegerVector remainingQuantity(1);
int maxAttempts = 100;

// Create container for our results
Rcpp::IntegerVector res(maxAttempts);
std::fill( res.begin(), res.end(), NA_INTEGER );

// Calculate min/max so that we can draw random integers from within range
Rcpp::IntegerVector::iterator mn = std::min_element(xa.begin(), xa.end()) ;
Rcpp::IntegerVector::iterator mx = std::max_element(xa.begin(), xa.end()) ;
std::cout << "mx = " << *mx << std::endl;

// Now draw repeatedly
nsampled = 0;
for( int i = 0; i < maxAttempts; i++ ) {
  std::cout << "\\n" << i;
  int r = *mn + (rand() % (int)(*mx - *mn + 1));
  res[i] = xa[r+1];
  // Calculate n and s for current loop iteration
  numZeroes = 0;
  for( int j = 0; j < maxAttempts; j++) 
    if(res[j]==0) numZeroes++;
  std::cout << " nz= " << numZeroes ;
  nsampled = maxAttempts - sum( is_na(res) ) - numZeroes - 1;
  currentSum = std::accumulate(res.begin(),res.begin()+i,0); // Cant just use Rcpp sugar sum() here because it freaks at the NAs
  std::cout << " nsamp= " << nsampled << " sum= " << currentSum;
  if(nsampled == na[0]-1) {  
    std::cout << " One element away. ";
    remainingQuantity[0] = sa[0] - currentSum;
    std::cout << "remainingQuantity = " << remainingQuantity[0];
    if( (remainingQuantity[0] > 0) && (remainingQuantity[0]) < *mx ) {
      std::cout << "Within range.  Prepare the secret (cheating) weapon!\\n";
      std::cout << sa[0] << " ";
      std::cout << currentSum << " ";
      std::cout << remainingQuantity[0] << std::endl;
      if( i != maxAttempts ) {
        std::cout << "Safe to add one last element on the end.  Doing so.\\n";
        res[i] = remainingQuantity[0];
      }
      currentSum = sa[0];
      nsampled++;
      if(nsampled == na[0] && currentSum == sa[0]) std::cout << "It should end after this...nsamp= " << nsampled << " and currentSum= " << currentSum << std::endl;
      break;
    } else {
      std::cout << "Out of striking distance.  Dropping random element\\n";
      dropRandomIndex = 0 + (rand() % (int)(i - 0 + 1));
      res[dropRandomIndex] = 0;
    }
  }
  if(nsampled == na[0] && currentSum == sa[0]) {
      std::cout << "Success!\\n";
      for(int l = 0; l <= i+1; l++) 
        std::cout << res[l] << " " ;
      break;
  }
  if(nsampled == na[0] && currentSum != sa[0]) {
    std::cout << "Reached number of elements but sum is ";
    if(currentSum > sa[0]) {
      std::cout << "Too high. Blitz everything and start over!\\n";
      for(int k = 0; k < res.size(); k++) {
        res[k] = NA_INTEGER;
      }
    } else {
      std::cout << "Too low.  \\n";

    }
  }
  if( nsampled < na[0] && currentSum >= sa[0] ) {
    std::cout << "Too few elements but at or above the sum cutoff.  Dropping a random element and trying again.\\n";
    dropRandomIndex = 0 + (rand() % (int)(i - 0 + 1));
    res[dropRandomIndex] = 0;
  }
}
return res;
'

sumto <- cxxfunction( signature(x="integer", n="integer", s="integer"), body=cpp_src, plugin="Rcpp", verbose=TRUE )

testresult <- sumto(x=x, n=20L, s=1000L)
testresult <- testresult[!is.na(testresult)]
testresult <- testresult[testresult!=0]
testresult
cumsum(testresult)
length(testresult)

Tried it with a few different values, and produces valid answers unless it runs away. There's a caveat here, which is that it cheats if it's one away from the desired number of elements and within "striking distance" -- e.g. rather than just drawing the last value it calculates it if that number is valid.

Benchmarks

See gist for comparison code.

benchmarks

0 讨论(0)

查看其它5个回答