Count unique values of a column by pairwise combinations of another column in R

前端 未结 4 1252
日久生厌
日久生厌 2020-12-07 03:51

Let\'s say I have the following data frame:

   ID Code
1   1    A
2   1    B
3   1    C
4   2    B
5   2    C
6   2    D
7   3    C
8   3    A
9   3    D
10          


        
4条回答
  •  猫巷女王i
    2020-12-07 04:07

    Using base only:

    df <- data.frame(ID=c(1,1,1,2,2,2,3,3,3,3,4,4), 
                     code=c("A", "B", "C", "B", "C", "D", "C", "A", "D", "B", "D", "B"), stringsAsFactors =FALSE)
    # Create data.frame of unique combinations of codes
    e <- expand.grid(df$code, df$code)
    e <- e[e[,1]!=e[,2],]
    e1 <- as.data.frame(unique(t(apply(e, 1, sort))), stringsAsFactors = FALSE)
    
    # Count the occurrence of each code combination across IDs
    e1$count <- apply(e1, 1, function(y) 
                      sum(sapply(unique(df$ID), function(x) 
                                 sum(y[1] %in% df$code[df$ID==x] & y[2] %in% df$code[df$ID==x]))))
    
    # Turn the codes into a string and print output
    out <- data.frame(Code.Combinations=do.call(paste, c(e1[,1:2], sep=", ")),
                      Count.of.ID=e1$count, stringsAsFactors = FALSE)
    
    
    out
    #   Code.Combinations Count.of.ID
    # 1              A, B           2
    # 2              A, C           2
    # 3              A, D           1
    # 4              B, C           3
    # 5              B, D           3
    # 6              C, D           2
    

提交回复
热议问题