Let\'s say I have the following data frame:
ID Code
1 1 A
2 1 B
3 1 C
4 2 B
5 2 C
6 2 D
7 3 C
8 3 A
9 3 D
10
Using base only:
df <- data.frame(ID=c(1,1,1,2,2,2,3,3,3,3,4,4),
code=c("A", "B", "C", "B", "C", "D", "C", "A", "D", "B", "D", "B"), stringsAsFactors =FALSE)
# Create data.frame of unique combinations of codes
e <- expand.grid(df$code, df$code)
e <- e[e[,1]!=e[,2],]
e1 <- as.data.frame(unique(t(apply(e, 1, sort))), stringsAsFactors = FALSE)
# Count the occurrence of each code combination across IDs
e1$count <- apply(e1, 1, function(y)
sum(sapply(unique(df$ID), function(x)
sum(y[1] %in% df$code[df$ID==x] & y[2] %in% df$code[df$ID==x]))))
# Turn the codes into a string and print output
out <- data.frame(Code.Combinations=do.call(paste, c(e1[,1:2], sep=", ")),
Count.of.ID=e1$count, stringsAsFactors = FALSE)
out
# Code.Combinations Count.of.ID
# 1 A, B 2
# 2 A, C 2
# 3 A, D 1
# 4 B, C 3
# 5 B, D 3
# 6 C, D 2