PCA降维（二） | 易学教程

下面是我实现的PCA代码，有不足之处还请指正

import numpy as np from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression  ''' define class myPCA ''' class myPCA:     '''     Initialize function of class myPCA.     Input:         n_components：The dimension after dimensionality reduction                 if n_components=0, n_components will be set by the refactoring threshold         t: threshold, t=0.95     '''     def __init__(self, n_components=0, t=0.95):         self.n_components = n_components         self.t = t         self.w = []         self.mean_x = []              '''     define get_mean_X function.     Input:         X：numpy.ndarry,  size: [num_sample, num_feature]     Return:         X1: numpy.ndarry,  size: [num_sample, num_feature]     '''     def get_mean_X(self, X):         self.mean_x = np.mean(X, axis=0)   # 按列计算均值         X1 = X - self.mean_x         return X1          '''     define refact function to set the n_components if n_components=0     Input:         A：numpy.ndarry,  size: [1, num_feature]     '''     def refact(self, A):         if self.n_components == 0:             d = A.shape[0]             Lambd = np.sum(A)             for i in range(d):                 lambd = np.sum(A[0:i])                 tt = lambd/Lambd                 if tt >= self.t:                     self.n_components = i                     return             self.n_components = int(d/3)                  '''     define fit_transform function to get the self.W and sample mean     Input:         X：numpy.ndarry,  size: [num_sample, num_feature]     Retrun:         new_X: Data after dimensionality reduction             numpy.ndarry,  size: [num_sample, n_components]     '''                     # 求出协方差矩阵XTX的特征值     def fit_transform(self, X):         m = X.shape[0]         x = self.get_mean_X(X)        # 均值         cov_x = 1/m*np.dot(X.T, X)         A, U = np.linalg.eig(cov_x)              # np.linalg.eig获得的A是特征值，T是特征向量矩阵，且T的列向量是特征向量         A = A.argsort()[::-1]         self.refact(A)         top_A_idx =A[0:self.n_components]  # 获得最大的d个特征值的索引         top_A = A[top_A_idx]                # 获得最大的k个特征值         self.w = U[:,top_A_idx]              # 对应的特征向量         new_X = np.dot(x, self.w)         return new_X      '''     define transform function     Input:         X：numpy.ndarry,  size: [num_sample, num_feature]     Retrun:         new_X: Data after dimensionality reduction             numpy.ndarry,  size: [num_sample, n_components]     '''            def transform(self, X):         temp_x = X - self.mean_x         x = np.dot(temp_x, self.w)         return x      '''     define get_n_components function     Input: None     Retrun:         n_components     '''             def get_n_components(self):         return self.n_components

调用方法如下：

pca = myPCA() # train_x: [num_sample, num_feature] train_x = pca.fit_transform(train_x)    test_x = pca.transform(test_x)      # 利用上一步训练好的w和均值来降维test_x

来源：51CTO

作者：a little boy

链接：https://blog.csdn.net/littleboy__/article/details/102647512

标签

pca

num