import numpy as np def init_centroids(k, n_features): return np.random.random(k * n_features).reshape((k, n_features)) def update_centroids(points, centroid_index): k = max(centroid_index)+1 new_centroids = np.zeros((10,2)) for i in range(k): new_centroids[i]=points[centroid_index==i].mean(axis=0) return new_centroids def distance(pointA, pointB): return np.sqrt((pointA[0]-pointB[0])**2+(pointA[1]-pointB[1])**2) def belongs2(point, centroids): index = 0 min_distance = np.inf for i in range(len(centroids)): d = distance(point, centroids[i]) if d<min_distance: min_distance=d index=i return index def update_index(points, centroids): n_samples = len(points) new_indeces = np.zeros((n_samples)) for i, point in enumerate(points): new_indeces[i] = belongs2(point, centroids) new_indeces = new_indeces.astype(int) return new_indeces def my_kmeans(points): centroids = init_centroids(10, 2) indeces=update_index(points, centroids) old_indeces = indeces for i in range(1000): centroids=update_centroids(points, indeces) indeces=update_index(points, centroids) if np.array_equal(indeces, old_indeces): print('converge', i) break else: old_indeces=indeces return centroids, indeces centroids, indeces=my_kmeans(points) centroids from sklearn.cluster import KMeans n_digits = 10 kmeans = KMeans(init='k-means++', n_clusters=n_digits, n_init=10) kmeans.fit(points) centroids = kmeans.cluster_centers_ 来源:51CTO
作者:ailinyingai
链接:https://blog.csdn.net/ailinyingai/article/details/100734781