The documentation for sklearn.cluster.AgglomerativeClustering mentions that,
when varying the number of clusters and using caching, it may be advant
I know it's an old question, however the solution below might turn out helpful
# scores = input matrix
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import cut_tree
from sklearn.metrics import silhouette_score
from sklearn.metrics.pairwise import euclidean_distances
linkage_mat = linkage(scores, method="ward")
euc_scores = euclidean_distances(scores)
n_l = 2
n_h = scores.shape[0]
silh_score = -2
# Selecting the best number of clusters based on the silhouette score
for i in range(n_l, n_h):
local_labels = list(cut_tree(linkage_mat, n_clusters=i).flatten())
sc = silhouette_score(
euc_scores,
metric="precomputed",
labels=local_labels,
random_state=42)
if silh_score < sc:
silh_score = sc
labels = local_labels
n_clusters = len(set(labels))
print(f"Optimal number of clusters: {n_clusters}")
print(f"Best silhouette score: {silh_score}")
# ...