I have performed a PCA analysis over my original dataset and from the compressed dataset transformed by the PCA I have also selected the number of PC I want to keep (they ex
# original_num_df the original numeric dataframe
# pca is the model
def create_importance_dataframe(pca, original_num_df):
# Change pcs components ndarray to a dataframe
importance_df = pd.DataFrame(pca.components_)
# Assign columns
importance_df.columns = original_num_df.columns
# Change to absolute values
importance_df =importance_df.apply(np.abs)
# Transpose
importance_df=importance_df.transpose()
# Change column names again
## First get number of pcs
num_pcs = importance_df.shape[1]
## Generate the new column names
new_columns = [f'PC{i}' for i in range(1, num_pcs + 1)]
## Now rename
importance_df.columns =new_columns
# Return importance df
return importance_df
# Call function to create importance df
importance_df =create_importance_dataframe(pca, original_num_df)
# Show first few rows
display(importance_df.head())
# Sort depending on PC of interest
## PC1 top 10 important features
pc1_top_10_features = importance_df['PC1'].sort_values(ascending = False)[:10]
print(), print(f'PC1 top 10 feautres are \n')
display(pc1_top_10_features )
## PC2 top 10 important features
pc2_top_10_features = importance_df['PC2'].sort_values(ascending = False)[:10]
print(), print(f'PC2 top 10 feautres are \n')
display(pc2_top_10_features )