If I want to sample numbers to create a vector I do:
set.seed(123)
x <- sample(1:100,200, replace = TRUE)
sum(x)
# [1] 10228
What if I
An attempt using R
# Config
n <- 20L
target <- 100L
vec <- seq(100)
set.seed(123)
# R repeat loop
sumto_repeat <- function(vec,n,target) {
res <- integer()
repeat {
cat("begin:",sum(res),length(res),"\n")
res <- c( res, sample(vec,1) )
if( sum(res)target) res <- res[-length(res)]
if( length(res)>n | length(res) sum(test)
[1] 100
> length(test)
[1] 20
Also, I'd give some thought to what distribution you'd like to be drawing from. I think that there are a few different ways of getting it to sum to exactly target
with n
elements (for instance, you could make the last element always be target - sum(res)
) that may or may not have different distributional implications.
A very similar algorithm in Rcpp, for speeeeed!
cpp_src <- '
Rcpp::IntegerVector xa = clone(x); // Vector to be sampled
Rcpp::IntegerVector na(n); // Number of elements in solution
Rcpp::IntegerVector sa(s); // Sum of solution
int nsampled;
int currentSum;
int dropRandomIndex;
int numZeroes;
Rcpp::IntegerVector remainingQuantity(1);
int maxAttempts = 100;
// Create container for our results
Rcpp::IntegerVector res(maxAttempts);
std::fill( res.begin(), res.end(), NA_INTEGER );
// Calculate min/max so that we can draw random integers from within range
Rcpp::IntegerVector::iterator mn = std::min_element(xa.begin(), xa.end()) ;
Rcpp::IntegerVector::iterator mx = std::max_element(xa.begin(), xa.end()) ;
std::cout << "mx = " << *mx << std::endl;
// Now draw repeatedly
nsampled = 0;
for( int i = 0; i < maxAttempts; i++ ) {
std::cout << "\\n" << i;
int r = *mn + (rand() % (int)(*mx - *mn + 1));
res[i] = xa[r+1];
// Calculate n and s for current loop iteration
numZeroes = 0;
for( int j = 0; j < maxAttempts; j++)
if(res[j]==0) numZeroes++;
std::cout << " nz= " << numZeroes ;
nsampled = maxAttempts - sum( is_na(res) ) - numZeroes - 1;
currentSum = std::accumulate(res.begin(),res.begin()+i,0); // Cant just use Rcpp sugar sum() here because it freaks at the NAs
std::cout << " nsamp= " << nsampled << " sum= " << currentSum;
if(nsampled == na[0]-1) {
std::cout << " One element away. ";
remainingQuantity[0] = sa[0] - currentSum;
std::cout << "remainingQuantity = " << remainingQuantity[0];
if( (remainingQuantity[0] > 0) && (remainingQuantity[0]) < *mx ) {
std::cout << "Within range. Prepare the secret (cheating) weapon!\\n";
std::cout << sa[0] << " ";
std::cout << currentSum << " ";
std::cout << remainingQuantity[0] << std::endl;
if( i != maxAttempts ) {
std::cout << "Safe to add one last element on the end. Doing so.\\n";
res[i] = remainingQuantity[0];
}
currentSum = sa[0];
nsampled++;
if(nsampled == na[0] && currentSum == sa[0]) std::cout << "It should end after this...nsamp= " << nsampled << " and currentSum= " << currentSum << std::endl;
break;
} else {
std::cout << "Out of striking distance. Dropping random element\\n";
dropRandomIndex = 0 + (rand() % (int)(i - 0 + 1));
res[dropRandomIndex] = 0;
}
}
if(nsampled == na[0] && currentSum == sa[0]) {
std::cout << "Success!\\n";
for(int l = 0; l <= i+1; l++)
std::cout << res[l] << " " ;
break;
}
if(nsampled == na[0] && currentSum != sa[0]) {
std::cout << "Reached number of elements but sum is ";
if(currentSum > sa[0]) {
std::cout << "Too high. Blitz everything and start over!\\n";
for(int k = 0; k < res.size(); k++) {
res[k] = NA_INTEGER;
}
} else {
std::cout << "Too low. \\n";
}
}
if( nsampled < na[0] && currentSum >= sa[0] ) {
std::cout << "Too few elements but at or above the sum cutoff. Dropping a random element and trying again.\\n";
dropRandomIndex = 0 + (rand() % (int)(i - 0 + 1));
res[dropRandomIndex] = 0;
}
}
return res;
'
sumto <- cxxfunction( signature(x="integer", n="integer", s="integer"), body=cpp_src, plugin="Rcpp", verbose=TRUE )
testresult <- sumto(x=x, n=20L, s=1000L)
testresult <- testresult[!is.na(testresult)]
testresult <- testresult[testresult!=0]
testresult
cumsum(testresult)
length(testresult)
Tried it with a few different values, and produces valid answers unless it runs away. There's a caveat here, which is that it cheats if it's one away from the desired number of elements and within "striking distance" -- e.g. rather than just drawing the last value it calculates it if that number is valid.
Benchmarks
See gist for comparison code.