Sample with a max

前端 未结 5 1229
日久生厌
日久生厌 2021-01-19 01:57

If I want to sample numbers to create a vector I do:

set.seed(123)
x <- sample(1:100,200, replace = TRUE)
sum(x)
# [1] 10228

What if I

5条回答
  •  温柔的废话
    2021-01-19 02:52

    An attempt using R

    # Config
    n <- 20L
    target <- 100L
    vec <- seq(100)
    set.seed(123)
    
    # R repeat loop
    sumto_repeat <- function(vec,n,target) {
      res <- integer()
      repeat {
        cat("begin:",sum(res),length(res),"\n")
        res <- c( res, sample(vec,1) )
        if( sum(res)target) res <- res[-length(res)]
        if( length(res)>n | length(res) sum(test)
    [1] 100
    > length(test)
    [1] 20
    

    Also, I'd give some thought to what distribution you'd like to be drawing from. I think that there are a few different ways of getting it to sum to exactly target with n elements (for instance, you could make the last element always be target - sum(res)) that may or may not have different distributional implications.

    A very similar algorithm in Rcpp, for speeeeed!

    cpp_src <- '
    Rcpp::IntegerVector xa = clone(x); // Vector to be sampled
    Rcpp::IntegerVector na(n); // Number of elements in solution
    Rcpp::IntegerVector sa(s); // Sum of solution
    
    int nsampled;
    int currentSum;
    int dropRandomIndex;
    int numZeroes;
    Rcpp::IntegerVector remainingQuantity(1);
    int maxAttempts = 100;
    
    // Create container for our results
    Rcpp::IntegerVector res(maxAttempts);
    std::fill( res.begin(), res.end(), NA_INTEGER );
    
    // Calculate min/max so that we can draw random integers from within range
    Rcpp::IntegerVector::iterator mn = std::min_element(xa.begin(), xa.end()) ;
    Rcpp::IntegerVector::iterator mx = std::max_element(xa.begin(), xa.end()) ;
    std::cout << "mx = " << *mx << std::endl;
    
    // Now draw repeatedly
    nsampled = 0;
    for( int i = 0; i < maxAttempts; i++ ) {
      std::cout << "\\n" << i;
      int r = *mn + (rand() % (int)(*mx - *mn + 1));
      res[i] = xa[r+1];
      // Calculate n and s for current loop iteration
      numZeroes = 0;
      for( int j = 0; j < maxAttempts; j++) 
        if(res[j]==0) numZeroes++;
      std::cout << " nz= " << numZeroes ;
      nsampled = maxAttempts - sum( is_na(res) ) - numZeroes - 1;
      currentSum = std::accumulate(res.begin(),res.begin()+i,0); // Cant just use Rcpp sugar sum() here because it freaks at the NAs
      std::cout << " nsamp= " << nsampled << " sum= " << currentSum;
      if(nsampled == na[0]-1) {  
        std::cout << " One element away. ";
        remainingQuantity[0] = sa[0] - currentSum;
        std::cout << "remainingQuantity = " << remainingQuantity[0];
        if( (remainingQuantity[0] > 0) && (remainingQuantity[0]) < *mx ) {
          std::cout << "Within range.  Prepare the secret (cheating) weapon!\\n";
          std::cout << sa[0] << " ";
          std::cout << currentSum << " ";
          std::cout << remainingQuantity[0] << std::endl;
          if( i != maxAttempts ) {
            std::cout << "Safe to add one last element on the end.  Doing so.\\n";
            res[i] = remainingQuantity[0];
          }
          currentSum = sa[0];
          nsampled++;
          if(nsampled == na[0] && currentSum == sa[0]) std::cout << "It should end after this...nsamp= " << nsampled << " and currentSum= " << currentSum << std::endl;
          break;
        } else {
          std::cout << "Out of striking distance.  Dropping random element\\n";
          dropRandomIndex = 0 + (rand() % (int)(i - 0 + 1));
          res[dropRandomIndex] = 0;
        }
      }
      if(nsampled == na[0] && currentSum == sa[0]) {
          std::cout << "Success!\\n";
          for(int l = 0; l <= i+1; l++) 
            std::cout << res[l] << " " ;
          break;
      }
      if(nsampled == na[0] && currentSum != sa[0]) {
        std::cout << "Reached number of elements but sum is ";
        if(currentSum > sa[0]) {
          std::cout << "Too high. Blitz everything and start over!\\n";
          for(int k = 0; k < res.size(); k++) {
            res[k] = NA_INTEGER;
          }
        } else {
          std::cout << "Too low.  \\n";
    
        }
      }
      if( nsampled < na[0] && currentSum >= sa[0] ) {
        std::cout << "Too few elements but at or above the sum cutoff.  Dropping a random element and trying again.\\n";
        dropRandomIndex = 0 + (rand() % (int)(i - 0 + 1));
        res[dropRandomIndex] = 0;
      }
    }
    return res;
    '
    
    sumto <- cxxfunction( signature(x="integer", n="integer", s="integer"), body=cpp_src, plugin="Rcpp", verbose=TRUE )
    
    testresult <- sumto(x=x, n=20L, s=1000L)
    testresult <- testresult[!is.na(testresult)]
    testresult <- testresult[testresult!=0]
    testresult
    cumsum(testresult)
    length(testresult)
    

    Tried it with a few different values, and produces valid answers unless it runs away. There's a caveat here, which is that it cheats if it's one away from the desired number of elements and within "striking distance" -- e.g. rather than just drawing the last value it calculates it if that number is valid.

    Benchmarks

    See gist for comparison code.

    benchmarks

提交回复
热议问题