I have a list of the following format:
[[1]]
[[1]]$a
[1] 1
[[1]]$b
[1] 3
[[1]]$c
[1] 5
[[2]]
[[2]]$c
[1] 2
[[2]]$a
[1] 3
There i
If you know the possible values beforehand, and you are dealing with large data, perhaps using data.table
and set
will be fast
cc <- createList(50000)
system.time({
nas <- rep.int(NA_real_, length(cc))
DT <- setnames(as.data.table(replicate(length(ids),nas, simplify = FALSE)), ids)
for(xx in seq_along(cc)){
.n <- names(cc[[xx]])
for(j in .n){
set(DT, i = xx, j = j, value = cc[[xx]][[j]])
}
}
})
# user system elapsed
# 0.68 0.01 0.70
full <- c('a','b', 'c')
system.time({
for(xx in seq_along(cc)) {
mm <- setdiff(full, names(cc[[xx]]))
if(length(mm) || all(names(cc[[xx]]) == full)){
cc[[xx]] <- as.data.table(cc[[xx]])
# any missing columns
if(length(mm)){
# if required add additional columns
cc[[xx]][, (mm) := as.list(rep(NA_real_, length(mm)))]
}
# put columns in correct order
setcolorder(cc[[xx]], full)
}
}
cdt <- rbindlist(cc)
})
# user system elapsed
# 21.83 0.06 22.00
This second solution has been left here to show how data.table
can be used poorly.