问题
I would like to create all possible combinations of a binary vector made of a fixed number of 0 and 1. For example: dim(v)=5x1; n1=3; n0=2; In this case I'd like to have something like:
1,1,1,0,0
1,1,0,1,0
1,1,0,0,1
1,0,1,1,0
1,0,1,0,1
1,0,0,1,1
0,1,1,1,0
0,1,1,0,1
0,1,0,1,1
0,0,1,1,1
I found some help reading this post Create all possible combiations of 0,1, or 2 "1"s of a binary vector of length n but i would like to generate only the combinations I need avoiding any waste of space (I think that the problem will increase explonentially with n)
回答1:
A slightly faster version of Marat's answer:
f.roland <- function(n, m) {
ind <- combn(seq_len(n), m)
ind <- t(ind) + (seq_len(ncol(ind)) - 1) * n
res <- rep(0, nrow(ind) * n)
res[ind] <- 1
matrix(res, ncol = n, nrow = nrow(ind), byrow = TRUE)
}
all.equal(f.2(16, 8), f.roland(16, 8))
#[1] TRUE
library(rbenchmark)
benchmark(f(16,8),f.2(16,8),f.roland(16,8))
# test replications elapsed relative user.self sys.self user.child sys.child
#2 f.2(16, 8) 100 5.693 1.931 5.670 0.020 0 0
#3 f.roland(16, 8) 100 2.948 1.000 2.929 0.017 0 0
#1 f(16, 8) 100 8.287 2.811 8.214 0.066 0 0
回答2:
You can try this approach:
f <- function(n=5,m=3)
t(apply(combn(1:n,m=m),2,function(cm) replace(rep(0,n),cm,1)))
f(5,3)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 1 1 0 0
# [2,] 1 1 0 1 0
# [3,] 1 1 0 0 1
# [4,] 1 0 1 1 0
# [5,] 1 0 1 0 1
# [6,] 1 0 0 1 1
# [7,] 0 1 1 1 0
# [8,] 0 1 1 0 1
# [9,] 0 1 0 1 1
# [10,] 0 0 1 1 1
The idea is to generate all combinations of indices for 1, and then to use them to produce the final result.
Another flavor of the same approach:
f.2 <- function(n=5,m=3)
t(combn(1:n,m,FUN=function(cm) replace(rep(0,n),cm,1)))
The second approach is about twice faster:
library(rbenchmark)
benchmark(f(16,8),f.2(16,8))
# test replications elapsed relative user.self sys.self user.child sys.child
# 2 f.2(16, 8) 100 5.706 1.000 5.688 0.017 0 0
# 1 f(16, 8) 100 10.802 1.893 10.715 0.082 0 0
Benchmark
f.akrun <- function(n=5,m=3) {
indx <- combnPrim(1:n,m)
DT <- setDT(as.data.frame(matrix(0, ncol(indx),n)))
for(i in seq_len(nrow(DT))){
set(DT, i=i, j=indx[,i],value=1)
}
DT
}
benchmark(f(16,8),f.2(16,8),f.akrun(16,8))
# test replications elapsed relative user.self sys.self user.child sys.child
# 2 f.2(16, 8) 100 5.464 1.097 5.435 0.028 0 0
# 3 f.akrun(16, 8) 100 4.979 1.000 4.938 0.037 0 0
# 1 f(16, 8) 100 10.854 2.180 10.689 0.129 0 0
@akrun's solution (f.akrun) is ~10% faster than f.2.
[EDIT] Another approach, which is even more faster and simple:
f.3 <- function(n=5,m=3) t(combn(n,m,tabulate,nbins=n))
回答3:
You could try combnPrim from gRbase along with set from data.table (which could be faster)
source("http://bioconductor.org/biocLite.R")
biocLite("gRbase")
library(gRbase)
library(data.table)
n <-5
indx <- combnPrim(1:n,3)
DT <- setDT(as.data.frame(matrix(0, ncol(indx),n)))
for(i in seq_len(nrow(DT))){
set(DT, i=i, j=indx[,i],value=1)
}
DT
# V1 V2 V3 V4 V5
#1: 1 1 1 0 0
#2: 1 1 0 1 0
#3: 1 0 1 1 0
#4: 0 1 1 1 0
#5: 1 1 0 0 1
#6: 1 0 1 0 1
#7: 0 1 1 0 1
#8: 1 0 0 1 1
#9: 0 1 0 1 1
#10: 0 0 1 1 1
回答4:
Here is another approach:
func <- function(n, m) t(combn(n, m, function(a) {z=integer(n);z[a]=1;z}))
func(n = 5, m = 2)
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 1 0 0 0
# [2,] 1 0 1 0 0
# [3,] 1 0 0 1 0
# [4,] 1 0 0 0 1
# [5,] 0 1 1 0 0
# [6,] 0 1 0 1 0
# [7,] 0 1 0 0 1
# [8,] 0 0 1 1 0
# [9,] 0 0 1 0 1
# [10,] 0 0 0 1 1
来源:https://stackoverflow.com/questions/28368072/create-combinations-of-a-binary-vector