sklearn.datasets自带数据集介绍

匿名 (未验证) 提交于 2019-12-02 23:52:01

 

  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
  2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
  2 2]

""" import numpy as np print(iris.target_names)  #['setosa' 'versicolor' 'virginica'] np.bincount(iris.target)  #[50 50 50]  import matplotlib.pyplot as plt#以第3个索引为划分依据,x_index的值可以为0,1,2,3 x_index=3 color=['blue','red','green'] for label,color in zip(range(len(iris.target_names)),color):     plt.hist(iris.data[iris.target==label,x_index],label=iris.target_names[label],color=color)  plt.xlabel(iris.feature_names[x_index]) plt.legend(loc="Upper right") plt.show() #画散点图,第一维的数据作为x轴和第二维的数据作为y轴 x_index=0 y_index=1 colors=['blue','red','green'] for label,color in zip(range(len(iris.target_names)),colors):     plt.scatter(iris.data[iris.target==label,x_index],                 iris.data[iris.target==label,y_index],                 label=iris.target_names[label],                 c=color) plt.xlabel(iris.feature_names[x_index]) plt.ylabel(iris.feature_names[y_index]) plt.legend(loc='upper left') plt.show()

from sklearn.datasets import load_digits digits=load_digits() print(digits.data.shape) import matplotlib.pyplot as plt plt.gray() plt.matshow(digits.images[0]) plt.show()  from sklearn.datasets import load_digits digits=load_digits() digits.keys() n_samples,n_features=digits.data.shape print((n_samples,n_features))  print(digits.data.shape) print(digits.images.shape)  import numpy as np print(np.all(digits.images.reshape((1797,64))==digits.data))  fig=plt.figure(figsize=(6,6)) fig.subplots_adjust(left=0,right=1,bottom=0,top=1,hspace=0.05,wspace=0.05) #绘制数字:每张图像8*8像素点 for i in range(64):     ax=fig.add_subplot(8,8,i+1,xticks=[],yticks=[])     ax.imshow(digits.images[i],cmap=plt.cm.binary,interpolation='nearest')     #用目标值标记图像     ax.text(0,7,str(digits.target[i])) plt.show()

from sklearn.datasets import load_svmlight_file x_train,y_train=load_svmlight_file("/path/to/train_dataset.txt","")#如果要加在多个数据的时候,可以用逗号隔开

#生成多类单标签数据集import numpy as np import matplotlib.pyplot as plt from sklearn.datasets.samples_generator import make_blobs center=[[1,1],[-1,-1],[1,-1]] cluster_std=0.3 X,labels=make_blobs(n_samples=200,centers=center,n_features=2,                     cluster_std=cluster_std,random_state=0) print('X.shape',X.shape) print("labels",set(labels))  unique_lables=set(labels) colors=plt.cm.Spectral(np.linspace(0,1,len(unique_lables))) for k,col in zip(unique_lables,colors):     x_k=X[labels==k]     plt.plot(x_k[:,0],x_k[:,1],'o',markerfacecolor=col,markeredgecolor="k",              markersize=14) plt.title('data by make_blob()') plt.show() #生成用于分类的数据集from sklearn.datasets.samples_generator import make_classification X,labels=make_classification(n_samples=200,n_features=2,n_redundant=0,n_informative=2,                              random_state=1,n_clusters_per_class=2) rng=np.random.RandomState(2) X+=2*rng.uniform(size=X.shape)  unique_lables=set(labels) colors=plt.cm.Spectral(np.linspace(0,1,len(unique_lables))) for k,col in zip(unique_lables,colors):     x_k=X[labels==k]     plt.plot(x_k[:,0],x_k[:,1],'o',markerfacecolor=col,markeredgecolor="k",              markersize=14) plt.title('data by make_classification()') plt.show()  #生成球形判决界面的数据 from sklearn.datasets.samples_generator import make_circles X,labels=make_circles(n_samples=200,noise=0.2,factor=0.2,random_state=1) print("X.shape:",X.shape) print("labels:",set(labels))  unique_lables=set(labels) colors=plt.cm.Spectral(np.linspace(0,1,len(unique_lables))) for k,col in zip(unique_lables,colors):     x_k=X[labels==k]     plt.plot(x_k[:,0],x_k[:,1],'o',markerfacecolor=col,markeredgecolor="k",              markersize=14) plt.title('data by make_moons()') plt.show()

参考资料:https://www.cnblogs.com/nolonely/p/6980160.html

 

  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
  2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
  2 2]

""" import numpy as np print(iris.target_names)  #['setosa' 'versicolor' 'virginica'] np.bincount(iris.target)  #[50 50 50]  import matplotlib.pyplot as plt#以第3个索引为划分依据,x_index的值可以为0,1,2,3 x_index=3 color=['blue','red','green'] for label,color in zip(range(len(iris.target_names)),color):     plt.hist(iris.data[iris.target==label,x_index],label=iris.target_names[label],color=color)  plt.xlabel(iris.feature_names[x_index]) plt.legend(loc="Upper right") plt.show() #画散点图,第一维的数据作为x轴和第二维的数据作为y轴 x_index=0 y_index=1 colors=['blue','red','green'] for label,color in zip(range(len(iris.target_names)),colors):     plt.scatter(iris.data[iris.target==label,x_index],                 iris.data[iris.target==label,y_index],                 label=iris.target_names[label],                 c=color) plt.xlabel(iris.feature_names[x_index]) plt.ylabel(iris.feature_names[y_index]) plt.legend(loc='upper left') plt.show()

from sklearn.datasets import load_digits digits=load_digits() print(digits.data.shape) import matplotlib.pyplot as plt plt.gray() plt.matshow(digits.images[0]) plt.show()  from sklearn.datasets import load_digits digits=load_digits() digits.keys() n_samples,n_features=digits.data.shape print((n_samples,n_features))  print(digits.data.shape) print(digits.images.shape)  import numpy as np print(np.all(digits.images.reshape((1797,64))==digits.data))  fig=plt.figure(figsize=(6,6)) fig.subplots_adjust(left=0,right=1,bottom=0,top=1,hspace=0.05,wspace=0.05) #绘制数字:每张图像8*8像素点 for i in range(64):     ax=fig.add_subplot(8,8,i+1,xticks=[],yticks=[])     ax.imshow(digits.images[i],cmap=plt.cm.binary,interpolation='nearest')     #用目标值标记图像     ax.text(0,7,str(digits.target[i])) plt.show()

from sklearn.datasets import load_svmlight_file x_train,y_train=load_svmlight_file("/path/to/train_dataset.txt","")#如果要加在多个数据的时候,可以用逗号隔开

#生成多类单标签数据集import numpy as np import matplotlib.pyplot as plt from sklearn.datasets.samples_generator import make_blobs center=[[1,1],[-1,-1],[1,-1]] cluster_std=0.3 X,labels=make_blobs(n_samples=200,centers=center,n_features=2,                     cluster_std=cluster_std,random_state=0) print('X.shape',X.shape) print("labels",set(labels))  unique_lables=set(labels) colors=plt.cm.Spectral(np.linspace(0,1,len(unique_lables))) for k,col in zip(unique_lables,colors):     x_k=X[labels==k]     plt.plot(x_k[:,0],x_k[:,1],'o',markerfacecolor=col,markeredgecolor="k",              markersize=14) plt.title('data by make_blob()') plt.show() #生成用于分类的数据集from sklearn.datasets.samples_generator import make_classification X,labels=make_classification(n_samples=200,n_features=2,n_redundant=0,n_informative=2,                              random_state=1,n_clusters_per_class=2) rng=np.random.RandomState(2) X+=2*rng.uniform(size=X.shape)  unique_lables=set(labels) colors=plt.cm.Spectral(np.linspace(0,1,len(unique_lables))) for k,col in zip(unique_lables,colors):     x_k=X[labels==k]     plt.plot(x_k[:,0],x_k[:,1],'o',markerfacecolor=col,markeredgecolor="k",              markersize=14) plt.title('data by make_classification()') plt.show()  #生成球形判决界面的数据 from sklearn.datasets.samples_generator import make_circles X,labels=make_circles(n_samples=200,noise=0.2,factor=0.2,random_state=1) print("X.shape:",X.shape) print("labels:",set(labels))  unique_lables=set(labels) colors=plt.cm.Spectral(np.linspace(0,1,len(unique_lables))) for k,col in zip(unique_lables,colors):     x_k=X[labels==k]     plt.plot(x_k[:,0],x_k[:,1],'o',markerfacecolor=col,markeredgecolor="k",              markersize=14) plt.title('data by make_moons()') plt.show()

参考资料:https://www.cnblogs.com/nolonely/p/6980160.html

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!