I\'m a rookie in R and currently working with collaboration data in the form of an edge list with 32 columns and around 200.000 rows. I want to create a (co-)occurrence matr
An option using base::table:
df <- data.frame(ID = c(1,2,3,4),
V1 = c("England", "England", "China", "England"),
V2 = c("Greece", "England", "Greece", "England"),
V3 = c("USA", "China", "Greece", "England"))
#get paired combi and remove those from same country
pairs <- as.data.frame(do.call(rbind,
by(df, df$ID, function(x) t(combn(as.character(x[-1L]), 2L)))))
pairs <- pairs[pairs$V1!=pairs$V2, ]
#repeat data frame with columns swap so that
#upper and lower tri have same numbers and all countries are shown
pairs <- rbind(pairs, data.frame(V1=pairs$V2, V2=pairs$V1))
#tabulate pairs
tab <- table(pairs)
#set diagonals to be the count of countries
cnt <- c(table(unlist(df[-1L])))
diag(tab) <- cnt[names(diag(tab))]
tab
output:
V2
V1 China England Greece USA
China 2 2 2 0
England 2 6 1 1
Greece 2 1 3 1
USA 0 1 1 1