Intersect all possible combinations of list elements

后端 未结 3 464
面向向阳花
面向向阳花 2020-12-17 18:13

I have a list of vectors:

> l <- list(A=c(\"one\", \"two\", \"three\", \"four\"), B=c(\"one\", \"two\"), C=c(\"two\", \"four\", \"five\", \"six\"), D=c         


        
相关标签:
3条回答
  • 2020-12-17 18:45

    If I understand correctly, you can look at crossprod and stack:

    crossprod(table(stack(l)))
    #    ind
    # ind A B C D
    #   A 4 2 2 0
    #   B 2 2 1 0
    #   C 2 1 4 1
    #   D 0 0 1 2
    

    You can extend the idea if you want a data.frame of just the relevant values as follows:

    1. Write a spiffy function

      listIntersect <- function(inList) {
        X <- crossprod(table(stack(inList)))
        X[lower.tri(X)] <- NA
        diag(X) <- NA
        out <- na.omit(data.frame(as.table(X)))
        out[order(out$ind), ]
      }
      
    2. Apply it

      listIntersect(l)
      #    ind ind.1 Freq
      # 5    A     B    2
      # 9    A     C    2
      # 13   A     D    0
      # 10   B     C    1
      # 14   B     D    0
      # 15   C     D    1
      

    Performance seems pretty decent.

    Expand the list:

    L <- unlist(replicate(100, l, FALSE), recursive=FALSE)
    names(L) <- make.unique(names(L))
    

    Set up some functions to test:

    fun1 <- function(l) listIntersect(l)
    fun2 <- function(l) apply( combn( l , 2 ) , 2 , function(x) length( intersect( unlist( x[1]) , unlist(x[2]) ) ) )
    fun3 <- function(l) {
      m1 <- combn(names(l),2)
      val <- sapply(split(m1, col(m1)),function(x) {x1 <- l[[x[1]]]; x2 <- l[[x[2]]]; length(intersect(x1, x2))})
      Ind <- apply(m1,2,paste,collapse="int")
      data.frame(Ind, val, stringsAsFactors=F) 
    }
    

    Check out the timings:

    system.time(F1 <- fun1(L))
    #    user  system elapsed 
    #    0.33    0.00    0.33
    system.time(F2 <- fun2(L))
    #    user  system elapsed 
    #    4.32    0.00    4.31 
    system.time(F3 <- fun3(L))
    #    user  system elapsed 
    #    6.33    0.00    6.33 
    

    Everyone seems to be sorting the result differently, but the numbers match:

    table(F1$Freq)
    # 
    #     0     1     2     4 
    # 20000 20000 29900  9900 
    table(F2)
    # F2
    #     0     1     2     4 
    # 20000 20000 29900  9900 
    table(F3$val)
    # 
    #     0     1     2     4 
    # 20000 20000 29900  9900 
    
    0 讨论(0)
  • 2020-12-17 18:55

    combn works with list structures as well, you just need a little unlist'ing of the result to use intersect...

    # Get the combinations of names of list elements
    nms <- combn( names(l) , 2 , FUN = paste0 , collapse = "" , simplify = FALSE )
    
    # Make the combinations of list elements
    ll <- combn( l , 2 , simplify = FALSE )
    
    # Intersect the list elements
    out <- lapply( ll , function(x) length( intersect( x[[1]] , x[[2]] ) ) )
    
    # Output with names
    setNames( out , nms )
    #$AB
    #[1] 2
    
    #$AC
    #[1] 2
    
    #$AD
    #[1] 0
    
    #$BC
    #[1] 1
    
    #$BD
    #[1] 0
    
    #$CD
    #[1] 1
    
    0 讨论(0)
  • 2020-12-17 18:57

    Try:

    m1 <- combn(names(l),2)
    val <- sapply(split(m1, col(m1)),function(x) {x1 <- l[[x[1]]]; x2 <- l[[x[2]]]; length(intersect(x1, x2))})
    Ind <- apply(m1,2,paste,collapse="int")
    data.frame(Ind, val, stringsAsFactors=F)   
    #      Ind val
    # 1 AntB   2
    # 2 AntC   2
    # 3 AntD   0
    # 4 BntC   1
    # 5 BntD   0
    # 6 CntD   1
    
    0 讨论(0)
提交回复
热议问题