问题:如何绘制二维和三维的主成分图,以及如何解释?
Code: 二维图
import matplotlib.pyplot as plt
import numpy as np
def biplot(reduced_data, labels, pc, variable):
"""plot compositional biplot for two principle components
:param reduced_data: data processed by PCA
:param labels: labels of original dataset
:param pc: all the principle components
:param variable: the name of the variables of the data set
"""
plt.figure(1, figsize=(14, 10))
legend = [] #
classes = np.unique(labels) # label type
n = pc.shape[1]
# colors = ['g', 'r', 'y']
# markers = ['o', '^', 'x']
x = reduced_data[:, 0] # variable contributions for PC1
y = reduced_data[:, 1] # variable contributions for PC2
scalex = 1.0/(x.max() - x.min())
scaley = 1.0/(y.max() - y.min())
# Draw a data point projection plot that is projected to
# a two-dimensional plane using normal PCA
for i, label in enumerate(classes):
plt.scatter(x[labels==label] * scalex,
y[labels==label] * scaley,
linewidth=0.01)
# hyperparameter in plt.scatter(): c=colors[i], marker=markers[i]
legend.append("Label: {}".format(label))
plt.legend(legend)
# plot arrows as the variable contribution,
# each variable has a score for PC1 and for PC2 respectively
for i in range(n):
plt.arrow(0, 0, pc[0, i], pc[1, i], color='k', alpha=0.7,
linewidth=1, )
plt.text(pc[0, i]*1.01, pc[1, i]*1.01, variable[i],
ha='center', va='center', color='k', fontsize=12)
plt.xlabel("$PC1$")
plt.ylabel("$PC2$")
plt.title("Compositional biplot")
plt.grid()
save_fig("Compositional biplot")
解释:
- 能够清楚的看到哪些特征对主成分(PC)的贡献值大,一般来说,是分别分析哪些特征对PC1较为重要,哪些特征对PC2较为重要;
- 对于PC1来说,元素的横坐标的绝对值越大,越重要
- 对于PC2来说,元素的纵坐标的绝对值越大,越重要
Code:三维图
import matplotlib.pyplot as plt
import numpy as np
def componential_plot_static_3d(reduced_data, labels, pc, variable):
"""draw a static componential plot in 3d for three principle components
:param reduced_data: data processed by PCA
:param labels: labels of original dataset
:param pc: all the principle components
:param variable: the name of the variables of the data set
"""
fig = plt.figure(1, figsize=(14, 12))
ax = plt.axes(projection='3d')
legend = [] #
classes = np.unique(labels) # label type
n = pc.shape[1]
# colors = ['g', 'r', 'y']
# markers = ['o', '^', 'x']
x = reduced_data[:, 0] # variable contributions for PC1
y = reduced_data[:, 1] # variable contributions for PC2
z = reduced_data[:, 2] # variable contributions for PC3
scalex = 1.0/(x.max() - x.min())
scaley = 1.0/(y.max() - y.min())
scalez = 1.0/(z.max() - z.min())
# Draw a data point projection plot that is projected to
# a three-dimensional space using normal PCA
for i, label in enumerate(classes):
ax.scatter3D(x[labels==label] * scalex,
y[labels==label] * scaley,
z[labels==label] * scalez,
linewidth=0.01)
# hyperparameter in plt.scatter(): c=colors[i], marker=markers[i]
legend.append("Label: {}".format(label))
ax.legend(legend)
# the initial angle to draw the 3d plot
azim = -60 # azimuth
elev = 30 # elevation
ax.view_init(elev, azim) # set the angles
# plot arrows as the variable contribution,
# each variable has a score for PC1, for PC2 and for PC3 respectively
for i in range(n):
ax.quiver(0, 0, 0, pc[0, i], pc[1, i], pc[2, i], color='k', alpha=0.7,
linewidth=1, arrow_length_ratio=0.05)
ax.text(pc[0, i]*1.1, pc[1, i]*1.1, pc[2, i]*1.1, variable[i],
ha='center', va='center', color='k', fontsize=12)
ax.set_xlabel("$PC1$")
ax.set_ylabel("$PC2$")
ax.set_zlabel("$PC3$")
plt.title("Componential Plot in 3 Dimension")
plt.grid()
save_fig("Componential_Plot_in_3_Dimension")
- elevation 仰角;azimuth方位角,ax.view_init(elev, azim)设置视角
- 当elevation=0时,视角为沿x1负方向看,当elevation=90时,视角沿x3负方向看。
- 当azimuth=0时,视角为沿x1负方向看,当azimuth=90时,视角沿x2负方向看。
- 随着azimuth的增加,从x3负方向看,x1x2平面是顺时针旋转的。
- 逆时针旋转,能把x1,x2的大小顺序调整为常规平面坐标系。
进一步学习【如何绘制三维PCA动图】
参考资料:
PCA clearly explained —When, Why, How to use it and feature importance: A guide in Python