下面是我实现的PCA代码,有不足之处还请指正
import numpy as np from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression ''' define class myPCA ''' class myPCA: ''' Initialize function of class myPCA. Input: n_components:The dimension after dimensionality reduction if n_components=0, n_components will be set by the refactoring threshold t: threshold, t=0.95 ''' def __init__(self, n_components=0, t=0.95): self.n_components = n_components self.t = t self.w = [] self.mean_x = [] ''' define get_mean_X function. Input: X:numpy.ndarry, size: [num_sample, num_feature] Return: X1: numpy.ndarry, size: [num_sample, num_feature] ''' def get_mean_X(self, X): self.mean_x = np.mean(X, axis=0) # 按列计算均值 X1 = X - self.mean_x return X1 ''' define refact function to set the n_components if n_components=0 Input: A:numpy.ndarry, size: [1, num_feature] ''' def refact(self, A): if self.n_components == 0: d = A.shape[0] Lambd = np.sum(A) for i in range(d): lambd = np.sum(A[0:i]) tt = lambd/Lambd if tt >= self.t: self.n_components = i return self.n_components = int(d/3) ''' define fit_transform function to get the self.W and sample mean Input: X:numpy.ndarry, size: [num_sample, num_feature] Retrun: new_X: Data after dimensionality reduction numpy.ndarry, size: [num_sample, n_components] ''' # 求出协方差矩阵XTX的特征值 def fit_transform(self, X): m = X.shape[0] x = self.get_mean_X(X) # 均值 cov_x = 1/m*np.dot(X.T, X) A, U = np.linalg.eig(cov_x) # np.linalg.eig获得的A是特征值,T是特征向量矩阵,且T的列向量是特征向量 A = A.argsort()[::-1] self.refact(A) top_A_idx =A[0:self.n_components] # 获得最大的d个特征值的索引 top_A = A[top_A_idx] # 获得最大的k个特征值 self.w = U[:,top_A_idx] # 对应的特征向量 new_X = np.dot(x, self.w) return new_X ''' define transform function Input: X:numpy.ndarry, size: [num_sample, num_feature] Retrun: new_X: Data after dimensionality reduction numpy.ndarry, size: [num_sample, n_components] ''' def transform(self, X): temp_x = X - self.mean_x x = np.dot(temp_x, self.w) return x ''' define get_n_components function Input: None Retrun: n_components ''' def get_n_components(self): return self.n_components
调用方法如下:
pca = myPCA() # train_x: [num_sample, num_feature] train_x = pca.fit_transform(train_x) test_x = pca.transform(test_x) # 利用上一步训练好的w和均值来降维test_x
来源:51CTO
作者:a little boy
链接:https://blog.csdn.net/littleboy__/article/details/102647512