应用多元统计分析(9)第八章 因子分析

课堂代码整理及注释

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler 

# 主因子法
dat = pd.read_excel("F:\\基础数学课\\应用多元统计分析\\exec6.5.xlsx",index_col=0)

R = dat.corr()
RI = np.linalg.inv(R)
h0sq = 1-1/np.diag(RI)

phi0 = np.diag(1 - h0sq)
phi0=pd.DataFrame(phi0,index=R.index,columns=R.index)
R_star = R-phi0

L,U = np.linalg.eig(R_star)
# 与课本一致
U[:,:2]
L[:2]
A0 = U[:,:2].dot(np.diag(L[:2]**0.5))

h1sq = np.sum(A0**2,axis=1)

phi1 = np.diag(1-h1sq)

## 判断最后有没有达到稳定
sum(np.diag(phi1-phi0)**2)**0.5


phi1=pd.DataFrame(phi1,index=R.index,columns=R.index)
R_star1 = R-phi1

L,U = np.linalg.eig(R_star1)

A1 = U[:,:4].dot(np.diag(L[:4]**0.5))

h2sq = np.sum(A1**2,axis=1)
phi2 = np.diag(1-h2sq)

sum(np.diag(phi2-phi1)**2)**0.5

phi2=pd.DataFrame(phi2,index=R.index,columns=R.index)
R_star2 = R-phi2

L,U = np.linalg.eig(R_star2)

A2 = U[:,:4].dot(np.diag(L[:4]**0.5))

h3sq = np.sum(A2**2,axis=1)
phi3 = np.diag(1-h3sq)

sum(np.diag(phi3-phi2)**2)**0.5

##
def FA(R,m,e=0.001,N=1000):
    h0sq = 1-1/np.diag(np.linalg.inv(R))
    phi0 = np.diag(1 - h0sq)
    L,U = np.linalg.eig(R-phi0)
    # 与课本一致 取两个
    A0 = U[:,:2].dot(np.diag(L[:2]**0.5))

    for i in range(N):
        h1sq = np.apply_along_axis(lambda x:sum(x**2),1,A0)
        phi1 = np.diag(1-h1sq)
        ## 判断最后有没有达到稳定
        e1 = sum(np.diag(phi1-phi0)**2)**0.5
        if e1 < e :
            break
        else:
            L,U = np.linalg.eig(R-phi1)
            phi0 = phi1
            A0 = U[:,:2].dot(np.diag(L[:2]**0.5))

    return (A0,e1,i)

A = FA(R,2)[0]
A1 = np.apply_along_axis(lambda x : x/(sum(x**2))**0.05,1,A)

u = A1[:,0]**2 - A1[:,1]**2
v = 2*A1[:,0]*A1[:,1]
A = sum(u)
B = sum(v)
C = sum(u**2-v**2)
D = 2*sum(u*v)
p = 8
tgvalue = (D-2*A*B/p)/(C-(A**2-B**2/p))

np.arctan(tgvalue)
phi = np.arctan(tgvalue)+np.pi/4
T = np.array([[np.cos(phi),np.sin(phi)],[-np.sin(phi),np.cos(phi)]])
B = A.dot(T)


## Thompson factor score


# 主成分法
dat = pd.read_excel("F:\\基础数学课\\应用多元统计分析\\exec6.5.xlsx",index_col=0)

R = dat.corr()

L,U = np.linalg.eig(R)

A = U[:,:2].dot(np.diag(L[:2]**0.5))
C = U[:,2:].dot(np.diag(L[2:]**0.5))
pd.DataFrame(C.dot(C.T)) #是否近似一个对角阵
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值