import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
# 加载CSV数据
df = pd.read_csv('你的数据路径')
# Separate features and labels
features = df.iloc[:, :-1] # data features (excluding the last column)
labels = df.iloc[:, -1] # labels (the last column)
# Label names
classes = ['class1', 'class2', 'class3', 'class4', 'class5']
unique_labels = df['label'].unique() # Get unique labels to use in visualization
# Perform PCA on the data
pca = PCA(n_components=3) # Reduce to 3D for visualization
pca_result = pca.fit_transform(features)
# Visualization of PCA result
plt.figure(figsize=(10, 7))
# Since the labels are integers, we can use them directly in the loop
for i, day in enumerate(classes):
plt.scatter(pca_result[labels==unique_labels[i], 0], pca_result[labels==unique_labels[i], 1], label=day)
plt.xticks(fontsize=20) # Set the font size for the x-axis tick labels
plt.yticks(fontsize=20) # Set the font size for the y-axis tick labels
plt.legend(fontsize=18)
# Display the explained variance ratio for each principal component
explained_variance_ratio = pca.explained_variance_ratio_
plt.xlabel(f'PC-1 ({explained_variance_ratio[0]*100:.2f}%)', size=20)
plt.ylabel(f'PC-2 ({explained_variance_ratio[1]*100:.2f}%)', size=20)
# plt.ylabel(f'PC-3 ({explained_variance_ratio[2]*100:.2f}%)', size=20)
plt.title('PCA Result', fontsize=24)
plt.show()
# Get and visualize the loadings
loadings = pca.components_
loadings_df = pd.DataFrame(loadings.T, columns=['PC-1', 'PC-2', 'PC-3'])
# Visualization of loadings as waveforms
plt.figure(figsize=(10, 7))
plt.plot(loadings[0], label='PC-1', color='blue')
plt.plot(loadings[1], label='PC-2', color='red')
plt.plot(loadings[2], label='PC-3', color='green')
plt.xlabel('Feature Index', fontsize=14)
plt.ylabel('Loading Value', fontsize=14)
plt.title('Loading Waveforms', fontsize=16)
plt.legend(fontsize=12)
plt.grid(True)
plt.show()
光谱PCA——Python实现
于 2023-09-21 15:34:19 首次发布