R creating a sequence table from two columns

若如初见. 提交于 2019-12-28 04:33:12

问题


I have a table as below

product=c("a","b","c")
min=c(1,5,3)
max=c(1,7,7)
dd=data.frame(product,min,max)
> dd
  product min max
1       a   1   1
2       b   5   7
3       c   3   7

I want to create a table which will look like below. I want to create one row for each value between and including min and max for a product

product mm
a 1
b 5
b 6
b 7
c 3
c 4
c 5
c 6
c 7

How can i do it using R? is there any package which would give quick results?


回答1:


With base R, you could do something like:

data.frame(product=rep(dd$product, dd$max-dd$min+1),
           mm=unlist(mapply(seq, dd$min, dd$max)))
#   product mm
# 1       a  1
# 2       b  5
# 3       b  6
# 4       b  7
# 5       c  3
# 6       c  4
# 7       c  5
# 8       c  6
# 9       c  7



回答2:


Try

library(data.table)
setDT(dd)[, list(mm=min:max), by = product]
#   product mm
#1:       a  1
#2:       b  5
#3:       b  6
#4:       b  7
#5:       c  3
#6:       c  4
#7:       c  5
#8:       c  6
#9:       c  7

Or a faster option would be seq.int(min, max, 1L) as suggested by @David Arenburg

 setDT(dd)[, list(mm = seq.int(min, max, 1L)), by = product]

Benchmarks

library(stringi)
set.seed(24)
product <- unique(stri_rand_strings(1e5,4))
min1 <- sample(1:10, length(product), replace=TRUE)
max1 <- sample(11:15, length(product), replace=TRUE)
dd <- data.frame(product, min1, max1)
dd2 <- copy(dd)

josilber <- function(){res1 <- data.frame(product=rep(dd$product,
                        dd$max1-dd$min1+1),
                  mm=unlist(mapply(seq, dd$min1, dd$max1)))
          }

akrun <- function(){as.data.table(dd2)[, list(mm = seq.int(min1, max1,
          1L)), by = product]}
Ananda <- function() {stack(lapply(split(dd[-1], dd[1]), 
                              function(x) seq(x[[1]], x[[2]])))}
jiber <- function(){res <- by(dd[,-1], dd[,1], function(x) 
              seq(x$min1, x$max1) )
             res <-  as.data.frame(unlist(res))
        data.frame(product=gsub("[0-9]", "", rownames(res)), mm=res[,1])}

system.time(akrun())
#   user  system elapsed 
# 0.129   0.001   0.129 
system.time(josilber())
#  user  system elapsed 
# 0.762   0.002   0.764 

 system.time(Ananda())
 #  user  system elapsed 
 #45.449   0.191  45.636 

system.time(jiber())
#  user  system elapsed 
# 48.013   8.218  56.291 

library(microbenchmark)
microbenchmark(josilber(), akrun(), times=20L, unit='relative')
#Unit: relative
#     expr     min       lq     mean   median       uq      max neval cld
#josilber() 6.39757 6.713236 5.570836 5.901037 5.603639 3.970663    20  b
#   akrun() 1.00000 1.000000 1.000000 1.000000 1.000000 1.000000    20  a 



回答3:


You could also consider split + lapply + stack:

stack(lapply(split(dd[-1], dd[1]), function(x) seq(x[[1]], x[[2]])))
##   values ind
## 1      1   a
## 2      5   b
## 3      6   b
## 4      7   b
## 5      3   c
## 6      4   c
## 7      5   c
## 8      6   c
## 9      7   c



回答4:


Just another approach using R base functions

> res <- by(dd[,-1], dd[,1], function(x) seq(x$min, x$max) )
> res <-  as.data.frame(unlist(res))
> data.frame(product=gsub("[0-9]", "", rownames(res)), mm=res[,1])
  product mm
1       a  1
2       b  5
3       b  6
4       b  7
5       c  3
6       c  4
7       c  5
8       c  6
9       c  7


来源:https://stackoverflow.com/questions/30602821/r-creating-a-sequence-table-from-two-columns

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!