alcohol

各种聚类算法的使用对比

谁说胖子不能爱 提交于 2020-02-07 02:34:46
1. 导入数据 # beer dataset import pandas as pd beer = pd . read_csv ( 'data.txt' , sep = ' ' ) beer 2.构建标签 X = beer [ [ "calories" , "sodium" , "alcohol" , "cost" ] ] 3.使用聚类算法 K-means clustering: from sklearn . cluster import KMeans km = KMeans ( n_clusters = 3 ) . fit ( X ) km2 = KMeans ( n_clusters = 2 ) . fit ( X ) km . labels_ beer [ 'cluster' ] = km . labels_ beer [ 'cluster2' ] = km2 . labels_ beer . sort_values ( 'cluster' ) from pandas . plotting import scatter_matrix % matplotlib inline cluster_centers = km . cluster_centers_ cluster_centers_2 = km2 . cluster_centers_ beer . groupby (

pandas-缺失值处理

浪子不回头ぞ 提交于 2019-12-05 12:14:58
import pandas as pd import numpy as np Step 1.加载数据集 # header=0以第一行作为列名 tip = pd.read_csv("lianx.csv",sep=',',header=0) tip.head() Step 2.删除第 1,4,7,9,11,13,14列,保存修改 a = list(tip.columns) print(a) b = [] c = 0 for i in a: c= c+1 if c in [1,4,7,9,11,13,14]: b.append(i) # print(b) # 删除列 tip = tip.drop(b,axis=1) tip.head() step 3.重命名列列索引依次为 1) alcohol 2) malic_acid 3) alcalinity_of_ash 4) magnesium 5) flavanoids 6) proanthocyanins 7) hue c = ['alcohol','malic_acid','alcalinity_of_ash','magnesium','flavanoids','proanthocyanins','hue'] b = list(tip.columns[:7]) b2 = list(tip.columns) print(b) print