“unpacking” a factor list from a data.frame

后端 未结 6 985
南旧
南旧 2020-12-06 13:34

I\'m new to R / having the option to easily re-organize data, and have hunted around for a solution but can\'t find exactly what I\'d like to do. Reshape2\'s melt/cast doesn

相关标签:
6条回答
  • 2020-12-06 13:35

    A plodding but seemingly robust solution:

    ## Some example data
    df <- as.data.frame(cbind(ID = paste0("ID", 1:2), 
                              category_list = list(4:1, 2:3), 
                              xvar = 8:9, 
                              yvar = 10:9))
    
    ## Calculate number of times each row of df will be repeated 
    nn <- sapply(df$category_list, length)  
    ii <- rep(seq_along(nn), times=nn)       
    
    ## Reshape data.frame
    transform(df[ii,], 
              category = unlist(df$category_list),
              category_list = NULL, 
              row.names = NULL)
    #    ID xvar yvar category
    # 1 ID1    8   10        4
    # 2 ID1    8   10        3
    # 3 ID1    8   10        2
    # 4 ID1    8   10        1
    # 5 ID2    9    9        2
    # 6 ID2    9    9        3
    
    0 讨论(0)
  • 2020-12-06 13:36

    The answer will depend on the format of category_list. If in fact it is a list for each row

    Something like

    mydf <- data.frame(ID = paste0('ID',1:3), 
     category_list = I(list(c('cat1','cat2','cat3'),  c('cat2','cat3'), c('cat1'))), 
     xval = 1:3, yval = 1:3)
    

    or

    library(data.table)
    mydf <- as.data.frame(data.table(ID = paste0('ID',1:3), 
     category_list = list(c('cat1','cat2','cat3'),  c('cat2','cat3'), c('cat1')), 
     xval = 1:3, yval = 1:3) )
    

    Then you can use plyr and merge to create your long form data

     newdf <- merge(mydf, ddply(mydf, .(ID), summarize, cat_list = unlist(category_list)), by = 'ID')
    
    
       ID    category_list xval yval cat_list
    1 ID1 cat1, cat2, cat3    1    1     cat1
    2 ID1 cat1, cat2, cat3    1    1     cat2
    3 ID1 cat1, cat2, cat3    1    1     cat3
    4 ID2       cat2, cat3    2    2     cat2
    5 ID2       cat2, cat3    2    2     cat3
    6 ID3             cat1    3    3     cat1
    

    or a non-plyr approach that doesn't require merge

     do.call(rbind,lapply(split(mydf, mydf$ID), transform, cat_list = unlist(category_list)))
    
    0 讨论(0)
  • 2020-12-06 13:43

    A possibility:

    x <- read.table(textConnection('
        ID      category_list    xval    yval
         ID1   "cat1, cat2, cat3"   xnum1   ynum1
         ID2         "cat2, cat3"   xnum2   ynum2
         ID3               "cat1"   xnum3   ynum3'),
              header=TRUE,stringsAsFactors=FALSE)
    
    library(plyr)
    ddply(x,"ID",transform,category=strsplit(category_list,",")[[1]])
    
    ##    ID    category_list  xval  yval category
    ## 1 ID1 cat1, cat2, cat3 xnum1 ynum1     cat1
    ## 2 ID1 cat1, cat2, cat3 xnum1 ynum1     cat2
    ## 3 ID1 cat1, cat2, cat3 xnum1 ynum1     cat3
    ## 4 ID2       cat2, cat3 xnum2 ynum2     cat2
    ## 5 ID2       cat2, cat3 xnum2 ynum2     cat3
    
    0 讨论(0)
  • 2020-12-06 13:47

    This will be a non-plyr approach:

    cbind( x[ rep(1:nrow(x), 
                  times=sapply(x$category_list, 
                                function(xx) sapply( strsplit(xx, ","), length) ) ),
              -2],    # to get rid of the old category column
           new_cats = unlist( strsplit(x$category_list, ",") ) )
     # this used Bolker's example. If these are factor will need to add `as.character`
    
         ID  xval  yval new_cats
    1   ID1 xnum1 ynum1     cat1
    1.1 ID1 xnum1 ynum1     cat2
    1.2 ID1 xnum1 ynum1     cat3
    2   ID2 xnum2 ynum2     cat2
    2.1 ID2 xnum2 ynum2     cat3
    3   ID3 xnum3 ynum3     cat1
    
    0 讨论(0)
  • 2020-12-06 13:55

    Note: Original answer deleted as my answer was based on a different data structure than what the OP seems to actually have.


    Scenario 1: Column is a list

    Using @mnel's sample data:

    mydf <- data.frame(ID = paste0('ID',1:3), 
     category_list = I(list(c('cat1','cat2','cat3'),  c('cat2','cat3'), c('cat1'))), 
     xval = 1:3, yval = 1:3)
    

    Using listCol_l from my "splitstackshape" package

    library(splitstackshape)
    listCol_l(mydf, "category_list")
    #     ID xval yval category_list_ul
    # 1: ID1    1    1             cat1
    # 2: ID1    1    1             cat2
    # 3: ID1    1    1             cat3
    # 4: ID2    2    2             cat2
    # 5: ID2    2    2             cat3
    # 6: ID3    3    3             cat1
    

    Using unnest from the "tidyr" package

    library(tidyr)
    unnest(mydf, "category_list")
    #    ID category_list xval yval
    # 1 ID1          cat1    1    1
    # 2 ID1          cat2    1    1
    # 3 ID1          cat3    1    1
    # 4 ID2          cat2    2    2
    # 5 ID2          cat3    2    2
    # 6 ID3          cat1    3    3
    

    Scenario 2: Column is a concatenated string

    Using @BenBolker's sample data:

    x <- read.table(textConnection('
        ID      category_list    xval    yval
         ID1   "cat1, cat2, cat3"   xnum1   ynum1
         ID2         "cat2, cat3"   xnum2   ynum2
         ID3               "cat1"   xnum3   ynum3'),
                    header=TRUE,stringsAsFactors=FALSE)
    

    Using cSplit from my "splitstackshape" package

    library(splitstackshape)
    cSplit(x, "category_list", ",", "long")
    #     ID category_list  xval  yval
    # 1: ID1          cat1 xnum1 ynum1
    # 2: ID1          cat2 xnum1 ynum1
    # 3: ID1          cat3 xnum1 ynum1
    # 4: ID2          cat2 xnum2 ynum2
    # 5: ID2          cat3 xnum2 ynum2
    # 6: ID3          cat1 xnum3 ynum3
    
    0 讨论(0)
  • 2020-12-06 13:57

    Another base R possibility using by:

    do.call(rbind,
    by(mydf,
       mydf$ID,
       function(x) {
         data.frame(
                    ID=x$ID,
                    category_list = unlist(strsplit(x$category_list,",")),
                    xval=x$xval,
                    yval=x$yval
                   ) 
       }
      )
    )
    

    Result:

           ID category_list  xval  yval
    ID1.1 ID1          cat1 xnum1 ynum1
    ID1.2 ID1          cat2 xnum1 ynum1
    ID1.3 ID1          cat3 xnum1 ynum1
    ID2.1 ID2          cat2 xnum2 ynum2
    ID2.2 ID2          cat3 xnum2 ynum2
    ID3   ID3          cat1 xnum3 ynum3
    
    0 讨论(0)
提交回复
热议问题