各种聚类算法的使用对比
1. 导入数据 # beer dataset import pandas as pd beer = pd . read_csv ( 'data.txt' , sep = ' ' ) beer 2.构建标签 X = beer [ [ "calories" , "sodium" , "alcohol" , "cost" ] ] 3.使用聚类算法 K-means clustering: from sklearn . cluster import KMeans km = KMeans ( n_clusters = 3 ) . fit ( X ) km2 = KMeans ( n_clusters = 2 ) . fit ( X ) km . labels_ beer [ 'cluster' ] = km . labels_ beer [ 'cluster2' ] = km2 . labels_ beer . sort_values ( 'cluster' ) from pandas . plotting import scatter_matrix % matplotlib inline cluster_centers = km . cluster_centers_ cluster_centers_2 = km2 . cluster_centers_ beer . groupby (