We can use .I
to extract the row index and should be faster
out <- df[df[, .I[seq_len(10)], by = b]$V1]
dim(out)
#[1] 5000 2
Checking if there are NAs (as the OP commented)
any(out[, Reduce(`|`, lapply(.SD, is.na))])
#[1] FALSE
dim(df)
#[1] 374337 2
Benchmarks
f3 <- function(df) {
df[df[, .I[seq_len(10)], by = b]$V1]
}
microbenchmark(f1(df), f2(df), f3(df), unit = "relative", times = 10L)
#Unit: relative
# expr min lq mean median uq max neval cld
# f1(df) 5.727822 5.480741 4.945486 5.672206 4.317531 5.10003 10 b
# f2(df) 24.572633 23.774534 17.842622 23.070634 16.099822 11.58287 10 c
# f3(df) 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 10 a