两两
x
i
x_i
xi和
x
j
x_j
xj交互项
e
i
2
=
α
0
+
α
1
X
5
i
+
α
2
X
6
i
+
⋯
+
α
8
X
13
i
′
+
α
9
X
5
i
2
+
⋯
+
α
16
X
13
i
′
2
+
α
17
X
1
i
X
2
i
+
⋯
+
α
44
X
12
X
13
′
+
ε
i
e_i^2 = \alpha_0+\alpha_1X_{5i}+\alpha_2X_{6i}+\cdots+\alpha_{8}X'_{13i}+\alpha_{9}X_{5i}^2+\cdots+\alpha_{16}X'^2_{13i}+\alpha_{17}X_{1i}X_{2i}+\cdots+\alpha_{44}X_{12}X_{13}'+\varepsilon_i
ei2=α0+α1X5i+α2X6i+⋯+α8X13i′+α9X5i2+⋯+α16X13i′2+α17X1iX2i+⋯+α44X12X13′+εi
H
0
:
α
i
=
0
,
i
=
1
,
2
,
⋯
,
44
;
H
1
:
α
i
不
全
为
0
H_0:\alpha_i=0,i=1,2,\cdots,44 ; \quad H_1: \alpha_i不全为0
H0:αi=0,i=1,2,⋯,44;H1:αi不全为0
统计量
W
T
(
p
)
=
n
R
2
∼
χ
2
(
p
)
WT(p)=nR^2\sim\chi^2(p)
WT(p)=nR2∼χ2(p)
我的代码写错了,这阵子忙完再改↓
import statsmodels.api as sm # 线性回归
def white_test(x, e):
'''
x: 因变量
e: 残差 e = y - y_predict
'''
xx = copy.deepcopy(x)
for n, i in enumerate(x.columns[:-1]):
for j in x.columns[n :-1]:
xx['%s*%s'%(i,j)] = x[i]*x[j]
print(xx.columns)
# OLS-Regreession
x1 = copy.deepcopy(xx)
x1.insert(0, 'constant', 1) # 添加常数项
model = sm.OLS(e, x1)
result_wt = model.fit()
# 计算
wt = len(xx)*result_wt.rsquared # result_wt.rsquared 可决系数
# 1- stats.chi2.cdf(wt, x.shape[0]) p值
chi = stats.chi2.ppf(1-0.5, df=xx.shape[1])
print('卡方在0.95分位数的临界值是%.2f'%chi)
if wt < chi:
print("计算出来的wt统计量是%.2f, 接受H0,认为不存在异方差"%wt)
elif wt > stats.chi.ppf(1-0.5, df=xx.shape[1]):
print("计算出来的wt统计量是%.2f, 拒绝H0, 接受H1,认为存在异方差"%wt)
return stats.chi.cdf(wt, df=xx.shape[1])
if __name__ == '__main__':
x = pd.DataFrame(data=np.arange(20).reshape(10,2), columns=['x1','x2'])
e = ... # e为上一次回归计算出来的残差
white_test(x, e)