I have a list of names like:
names = [\'A\', \'B\', \'C\', \'D\']
and a list of documents, that in each documents some of these names are m
from collections import OrderedDict
document = [['A', 'B'], ['C', 'B'], ['A', 'B', 'C', 'D']]
names = ['A', 'B', 'C', 'D']
occurrences = OrderedDict((name, OrderedDict((name, 0) for name in names)) for name in names)
# Find the co-occurrences:
for l in document:
for i in range(len(l)):
for item in l[:i] + l[i + 1:]:
occurrences[l[i]][item] += 1
# Print the matrix:
print(' ', ' '.join(occurrences.keys()))
for name, values in occurrences.items():
print(name, ' '.join(str(i) for i in values.values()))
Output;
A B C D
A 0 2 1 1
B 2 0 2 1
C 1 2 0 1
D 1 1 1 0