就是多维特征用一下PCA,降到2维,画散点图
reference:传送门
我的数据是四类,48维特征,一维label,看一下各类分布情况
源码
#参数:
#X:数据特征
#y:标签(2,3,4,5)
#数据分布
def distribution(X,y):
import matplotlib.pyplot as plt #加载matplotlib用于数据的可视化
from sklearn.decomposition import PCA #加载PCA算法包
pca=PCA(n_components=2) #加载PCA算法,降维后主成分数目为2
reduced_x=pca.fit_transform(X) #对样本进行降维
red_x,red_y=[],[]
blue_x,blue_y=[],[]
green_x,green_y=[],[]
yellow_x,yellow_y=[],[]
for i in range(len(reduced_x)):
if y[i] == 2:
red_x.append(reduced_x[i][0])
red_y.append(reduced_x[i][1])
elif y[i] == 3:
blue_x.append(reduced_x[i][0])
blue_y.append(reduced_x[i][1])
elif y[i] == 5:
yellow_x.append(reduced_x[i][0])
yellow_y.append(reduced_x[i][1])
else: #完成
green_x.append(reduced_x[i][0])
green_y.append(reduced_x[i][1])
#可视化
plt.scatter(red_x,red_y,c='r',marker='x',label='驱逐')
plt.scatter(blue_x,blue_y,c='b',marker='+',label='失败')
plt.scatter(green_x,green_y,c='g',marker='o',label='完成')
plt.scatter(yellow_x,yellow_y,c='y',marker='*',label='被杀')
plt.legend()
plt.show()