Suppose that I have five vectors:
A<-1:10
B<-1:10
C<-1:10
D<-1:10
E<-1:12
I could test two at a time using identical( ).
I had the same problem but decided to implement a solution based on Reduce
and one based on a double for
loop.
Functions:
all_elements_the_same = function(list) {
#func to compare with
comparison_func = function(x, y) {
if (!identical(x, y)) stop() #stop function if it finds a non-identical pair
y #return second element
}
#run comparisons
trial = try({
Reduce(f = comparison_func, x = list, init = list[[1]])
}, silent = T)
#return
if (class(trial) == "try-error") return(F)
T
}
all_elements_the_same2 = function(list, ignore_names = F) {
#double loop solution
for (i in seq_along(list)) {
for (j in seq_along(list)) {
#skip if comparing to self or if comparison already done
if (i >= j) next
#check
if (!identical(list[[i]], list[[j]])) return(F)
}
}
T
}
Test objects:
l_testlist_ok = list(1:3, 1:3, 1:3, 1:3, 1:3, 1:3)
l_testlist_bad = list(1:3, 1:3, 1:4, 1:3, 1:3, 1:3)
l_testlist_bad2 = list(1:3, 1:3, 1:4, 1:3, 1:3, 1:3, 1:3, 1:3, 1:3, 1:3, 1:3, 1:3, 1:3, 1:3, 1:3)
Test functionality:
> all_elements_the_same(l_testlist_ok)
[1] TRUE
> all_elements_the_same(l_testlist_bad)
[1] FALSE
> all_elements_the_same(l_testlist_bad2)
[1] FALSE
> all_elements_the_same2(l_testlist_ok)
[1] TRUE
> all_elements_the_same2(l_testlist_bad)
[1] FALSE
> all_elements_the_same2(l_testlist_bad2)
[1] FALSE
Test time use:
> library(microbenchmark)
> microbenchmark(all_elements_the_same(l_testlist_ok),
+ all_elements_the_same(l_testlist_bad),
+ all_elements_the_same(l_testlist_bad2),
+ all_elements_the_same2(l_testlist_ok),
+ all_elements_the_same2(l_testlist_bad),
+ all_elements_the_same2(l_testlist_bad2), times = 1e4)
Unit: microseconds
expr min lq mean median uq max neval
all_elements_the_same(l_testlist_ok) 19.310 25.454 28.309016 26.917 28.380 1003.228 10000
all_elements_the_same(l_testlist_bad) 93.624 100.938 108.890823 103.863 106.497 3130.807 10000
all_elements_the_same(l_testlist_bad2) 93.331 100.938 107.963741 103.863 106.497 1181.404 10000
all_elements_the_same2(l_testlist_ok) 48.275 53.541 57.334095 55.881 57.930 926.866 10000
all_elements_the_same2(l_testlist_bad) 6.144 7.315 8.437603 7.900 8.778 998.839 10000
all_elements_the_same2(l_testlist_bad2) 6.144 7.315 8.564780 8.192 8.778 1323.594 10000
So apparently, the try
part slows it down considerably. It may still save time to use the Reduce
variant if one has very large objects, but for smaller objects, double for
loop seems the way to go.
The fastest and simple solution using Rcpp
:
#include <Rcpp.h>
using namespace Rcpp;
inline bool same(SEXP a, SEXP b) {
return R_compute_identical(a, b, 0);
}
// [[Rcpp::export]]
bool identical_impl(List x) {
std::size_t n = x.size();
for (std::size_t i = 1; i < n; ++i)
if (!same(x[0], x[i])) return false;
return true;
}
/*** R
identical2 <- function(...) {
identical_impl(list(...))
}
*/
Some benchmarks with other solutions:
A <- 1:10
B <- 1:10
C <- 1:10
D <- 1:10
E <- 1:12
identical2 <- function(...) {
identical_impl(list(...))
}
identical3 <- function(...) {
length(unique(list(...))) == 1L
}
identical4 <- function(...) {
l <- list(...)
all(vapply(l[-1], l[[1]], FUN = identical,
FUN.VALUE = logical(1L), USE.NAMES = FALSE))
}
identical5 <- function(...) {
l <- list(...)
Vectorize(identical, 'x')(l[-1], l[[1L]])
}
identical6 <- function(...) {
l <- list(...)
for (i in seq_along(l)) {
if (!identical(l[[1]], l[[i]])) return(FALSE)
}
return(TRUE)
}
identical7 <- function(...) {
l <- list(...)
for (i in seq_along(l)) {
for (j in seq_along(l)) {
if (i >= j) next
if (!identical(l[[1]], l[[i]])) return(FALSE)
}
}
return(TRUE)
}
library(microbenchmark)
microbenchmark(
identical2(A, B, C, D, E),
identical3(A, B, C, D, E),
identical4(A, B, C, D, E),
identical5(A, B, C, D, E),
identical6(A, B, C, D, E),
identical7(A, B, C, D, E))
Results:
Unit: microseconds
expr min lq mean median uq max neval cld
identical2(A, B, C, D, E) 3.401 4.3065 5.32136 5.1245 5.5420 21.529 100 a
identical3(A, B, C, D, E) 6.480 7.8675 9.20970 8.3875 9.0175 26.739 100 b
identical4(A, B, C, D, E) 12.233 13.5680 15.48014 14.7755 15.5455 48.333 100 c
identical5(A, B, C, D, E) 90.177 93.1480 98.79570 95.2685 103.2765 178.657 100 e
identical6(A, B, C, D, E) 10.683 12.0650 13.43184 12.6820 13.4060 22.314 100 c
identical7(A, B, C, D, E) 28.202 31.0800 34.97819 32.4630 39.4960 68.902 100 d
First thought is to do unique
on a list of the vectors and check the length. If there are two or more vectors that are different, then the length of the resulting list will be greater than 1.
length(unique(list(A,B,C,D))) == 1
[1] TRUE
length(unique(list(A,B,C,D,E))) == 1
[1] FALSE
Another option, just for fun:
Vectorize(identical, 'x')(list(A, B, C, D, E), C)
I would just pick one, say A
, and do all pair-wise comparisons with it.
all(sapply(list(B, C, D, E), FUN = identical, A))
# [1] FALSE
Remove the all()
to see the not identical one(s)
sapply(list(B, C, D, E), FUN = identical, A)
# [1] TRUE TRUE TRUE FALSE
identical
ought to be transitive, so if A
is identical to C
and to D
, then C
should be identical to D
.
(Thanks to @docendo discimus for simplified syntax.)
This is pretty obvious, but: if there are a lot of elements and a good chance of failure, you'll want to be able to short circuit the comparisons. Here's a loop for that, with an example:
A = sample(1e3)
Alist <- replicate(1e6,A,simplify=FALSE)
Alist[[2]][1e3] <- 0
system.time({brkres <- {
ok=TRUE
for (i in seq_along(Alist)) if( !identical(Alist[[1]],Alist[[i]]) ){
ok=FALSE
break
}
ok
}})
# user system elapsed
# 0 0 0
system.time({allres <- all(sapply(Alist[-1], FUN = identical, Alist[[1]]))})
# user system elapsed
# 1.66 0.03 1.68
If you skip the Alist[[2]][1e3] <- 0
line, so that they are all identical, they take the same time.