因子分析用Python做的一个典型例子
一、实验目的
采用合适的数据分析方法对下面的题进行解答
二、实验要求
采用因子分析方法,根据48位应聘者的15项指标得分,选出6名最优秀的应聘者。
三、代码
importpandas as pdimportnumpy as npimportmath as mathimportnumpy as npfrom numpy import *
from scipy.stats importbartlettfrom factor_analyzer import *
importnumpy.linalg as nlgfrom sklearn.cluster importKMeansfrom matplotlib importcmimportmatplotlib.pyplot as pltdefmain():
df=pd.read_csv("./data/applicant.csv")#print(df)
df2=df.copy()print("\n原始数据:\n",df2)del df2['ID']#print(df2)
#皮尔森相关系数
df2_corr=df2.corr()print("\n相关系数:\n",df2_corr)#热力图
cmap =cm.Blues#cmap = cm.hot_r
fig=plt.figure()
ax=fig.add_subplot(111)
map= ax.imshow(df2_corr, interpolation='nearest', cmap=cmap, vmin=0, vmax=1)
plt.title('correlation coefficient--headmap')
ax.set_yticks(range(len(df2_corr.columns)))
ax.set_yticklabels(df2_corr.columns)
ax.set_xticks(range(len(df2_corr)))
ax.set_xticklabels(df2_corr.columns)
plt.colorbar(map)
plt.show()#KMO测度
defkmo(dataset_corr):
corr_inv=np.linalg.inv(dataset_corr)
nrow_inv_corr, ncol_inv_corr=dataset_corr.shape
A=np.ones((nrow_inv_corr, ncol_inv_corr))for