I\'m new to R / having the option to easily re-organize data, and have hunted around for a solution but can\'t find exactly what I\'d like to do. Reshape2\'s melt/cast doesn
A plodding but seemingly robust solution:
## Some example data
df <- as.data.frame(cbind(ID = paste0("ID", 1:2),
category_list = list(4:1, 2:3),
xvar = 8:9,
yvar = 10:9))
## Calculate number of times each row of df will be repeated
nn <- sapply(df$category_list, length)
ii <- rep(seq_along(nn), times=nn)
## Reshape data.frame
transform(df[ii,],
category = unlist(df$category_list),
category_list = NULL,
row.names = NULL)
# ID xvar yvar category
# 1 ID1 8 10 4
# 2 ID1 8 10 3
# 3 ID1 8 10 2
# 4 ID1 8 10 1
# 5 ID2 9 9 2
# 6 ID2 9 9 3
The answer will depend on the format of category_list
. If in fact it is a list
for each row
Something like
mydf <- data.frame(ID = paste0('ID',1:3),
category_list = I(list(c('cat1','cat2','cat3'), c('cat2','cat3'), c('cat1'))),
xval = 1:3, yval = 1:3)
or
library(data.table)
mydf <- as.data.frame(data.table(ID = paste0('ID',1:3),
category_list = list(c('cat1','cat2','cat3'), c('cat2','cat3'), c('cat1')),
xval = 1:3, yval = 1:3) )
Then you can use plyr
and merge
to create your long form data
newdf <- merge(mydf, ddply(mydf, .(ID), summarize, cat_list = unlist(category_list)), by = 'ID')
ID category_list xval yval cat_list
1 ID1 cat1, cat2, cat3 1 1 cat1
2 ID1 cat1, cat2, cat3 1 1 cat2
3 ID1 cat1, cat2, cat3 1 1 cat3
4 ID2 cat2, cat3 2 2 cat2
5 ID2 cat2, cat3 2 2 cat3
6 ID3 cat1 3 3 cat1
or a non-plyr approach that doesn't require merge
do.call(rbind,lapply(split(mydf, mydf$ID), transform, cat_list = unlist(category_list)))
A possibility:
x <- read.table(textConnection('
ID category_list xval yval
ID1 "cat1, cat2, cat3" xnum1 ynum1
ID2 "cat2, cat3" xnum2 ynum2
ID3 "cat1" xnum3 ynum3'),
header=TRUE,stringsAsFactors=FALSE)
library(plyr)
ddply(x,"ID",transform,category=strsplit(category_list,",")[[1]])
## ID category_list xval yval category
## 1 ID1 cat1, cat2, cat3 xnum1 ynum1 cat1
## 2 ID1 cat1, cat2, cat3 xnum1 ynum1 cat2
## 3 ID1 cat1, cat2, cat3 xnum1 ynum1 cat3
## 4 ID2 cat2, cat3 xnum2 ynum2 cat2
## 5 ID2 cat2, cat3 xnum2 ynum2 cat3
This will be a non-plyr approach:
cbind( x[ rep(1:nrow(x),
times=sapply(x$category_list,
function(xx) sapply( strsplit(xx, ","), length) ) ),
-2], # to get rid of the old category column
new_cats = unlist( strsplit(x$category_list, ",") ) )
# this used Bolker's example. If these are factor will need to add `as.character`
ID xval yval new_cats
1 ID1 xnum1 ynum1 cat1
1.1 ID1 xnum1 ynum1 cat2
1.2 ID1 xnum1 ynum1 cat3
2 ID2 xnum2 ynum2 cat2
2.1 ID2 xnum2 ynum2 cat3
3 ID3 xnum3 ynum3 cat1
Note: Original answer deleted as my answer was based on a different data structure than what the OP seems to actually have.
list
Using @mnel's sample data:
mydf <- data.frame(ID = paste0('ID',1:3),
category_list = I(list(c('cat1','cat2','cat3'), c('cat2','cat3'), c('cat1'))),
xval = 1:3, yval = 1:3)
Using listCol_l
from my "splitstackshape" package
library(splitstackshape)
listCol_l(mydf, "category_list")
# ID xval yval category_list_ul
# 1: ID1 1 1 cat1
# 2: ID1 1 1 cat2
# 3: ID1 1 1 cat3
# 4: ID2 2 2 cat2
# 5: ID2 2 2 cat3
# 6: ID3 3 3 cat1
Using unnest
from the "tidyr" package
library(tidyr)
unnest(mydf, "category_list")
# ID category_list xval yval
# 1 ID1 cat1 1 1
# 2 ID1 cat2 1 1
# 3 ID1 cat3 1 1
# 4 ID2 cat2 2 2
# 5 ID2 cat3 2 2
# 6 ID3 cat1 3 3
Using @BenBolker's sample data:
x <- read.table(textConnection('
ID category_list xval yval
ID1 "cat1, cat2, cat3" xnum1 ynum1
ID2 "cat2, cat3" xnum2 ynum2
ID3 "cat1" xnum3 ynum3'),
header=TRUE,stringsAsFactors=FALSE)
Using cSplit
from my "splitstackshape" package
library(splitstackshape)
cSplit(x, "category_list", ",", "long")
# ID category_list xval yval
# 1: ID1 cat1 xnum1 ynum1
# 2: ID1 cat2 xnum1 ynum1
# 3: ID1 cat3 xnum1 ynum1
# 4: ID2 cat2 xnum2 ynum2
# 5: ID2 cat3 xnum2 ynum2
# 6: ID3 cat1 xnum3 ynum3
Another base R possibility using by
:
do.call(rbind,
by(mydf,
mydf$ID,
function(x) {
data.frame(
ID=x$ID,
category_list = unlist(strsplit(x$category_list,",")),
xval=x$xval,
yval=x$yval
)
}
)
)
Result:
ID category_list xval yval
ID1.1 ID1 cat1 xnum1 ynum1
ID1.2 ID1 cat2 xnum1 ynum1
ID1.3 ID1 cat3 xnum1 ynum1
ID2.1 ID2 cat2 xnum2 ynum2
ID2.2 ID2 cat3 xnum2 ynum2
ID3 ID3 cat1 xnum3 ynum3