目录
使用Cochrane and Orcutt方法进行变量变换¶
Ex2 analysis of housing starts
EX1
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.stats.outliers_influence # 异常值检验
data=pd.read_csv("C:/Users/可乐怪/Desktop/csv/P199.csv")
data['constant']=1
table8.2
model=sm.OLS(data['Expenditure'],data[['constant','Stock']]).fit()
print(model.summary())
np.round(model.params,2)
OLS Regression Results ============================================================================== Dep. Variable: Expenditure R-squared: 0.957 Model: OLS Adj. R-squared: 0.955 Method: Least Squares F-statistic: 403.2 Date: Thu, 11 Nov 2021 Prob (F-statistic): 8.99e-14 Time: 14:13:22 Log-Likelihood: -54.964 No. Observations: 20 AIC: 113.9 Df Residuals: 18 BIC: 115.9 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ constant -154.7192 19.850 -7.794 0.000 -196.423 -113.016 Stock 2.3004 0.115 20.080 0.000 2.060 2.541 ============================================================================== Omnibus: 2.018 Durbin-Watson: 0.328 Prob(Omnibus): 0.365 Jarque-Bera (JB): 1.446 Skew: -0.453 Prob(JB): 0.485 Kurtosis: 2.044 Cond. No. 3.86e+03 ============================================================================
constant -154.72 Stock 2.30 dtype: float64
outliers=model.get_influence()
res=outliers.resid_studentized_internal
index=range(1,21,1)
plt.xlabel('Index',size=15)
plt.ylabel('Residuals',size=15)
plt.title('Figure 8.1',size=20,c="#653700")
plt.plot(index,res,'bo-.',alpha=0.7)
plt.axhline(y=0,ls="-",c="r",alpha=0.5)#添加水平直线
plt.show()
存在自相关
def runs(res):#游程检验
n1=0
n2=0
for i in res:
if i>0:
n1+=1
else:
n2+=1
μ=(2*n1*n2)/(n1+n2)+1
σ=(((2*n1*n2*(2*n1*n2-n1-n2)))/((n1+n2)**2)/(n1+n2-1))**0.5
return μ,σ
print("游程检验的均值和标准差为:\n",runs(res))
def DW1(y,x):
model=sm.OLS(y,x).fit()
res=y-model.fittedvalues
sse=model.ssr#ols模型的残差平方和
e=[res[i]-res[i-1] for i in range(1,len(y))]
e=np.sum(np.power(e, 2))#e^2
d=np.round(e/sse,3)
ρ1=1-d/2
e2=[res[i]*res[i-1] for i in range(1,len(y))]
e2=np.sum(e2)
ρ2 = np.round(e2/ sse,3)
return d,ρ1,ρ2
print("d=%s,ρ1=%sρ2=%s"%(DW1(data['Expenditure'],data[['constant','Stock']])))
ρ2=DW1(data['Expenditure'],data[['constant','Stock']])[2]
global ρ2
d=0.328<dL,reject Ho存在自相关
使用Cochrane and Orcutt方法进行变量变换¶
y_new=[data['Expenditure'][i]-(ρ2)*data['Expenditure'][i-1] for i in range(1,len(data['Expenditure']))]
x_new=[data['Stock'][i]-(ρ2)*data['Stock'][i-1] for i in range(1,len(data['Expenditure']))]
data_new=pd.DataFrame({'y_new':y_new,'x_new':x_new,'constant':1})
model2=sm.OLS(data_new['y_new'],data_new[['constant','x_new']]).fit()
print(model2.summary())
np.round(model2.params,2)
model2.ssr
得到新的方程y_new=-53.64+x_new*2.64¶
β0=-53.64/(1-0.751)
np.round(β0,3)
原变量表示的方程:y=-215.422+x*2.64
outliers=model2.get_influence()
res=outliers.resid_studentized_internal
index=range(1,20,1)
plt.plot(index,res,'bo-.',alpha=0.7)
plt.xlabel('Index',size=15)
plt.ylabel('Residuals',size=15)
plt.title('Figure 8.2',size=20,c="#653700")
plt.axhline(y=0,ls="-",c="r",alpha=0.5)#添加水平直线
plt.show()
def DW2(y,x):
model2=sm.OLS(y,x).fit()
res=y-model2.fittedvalues
sse=model2.ssr
e=[res[i]-res[i-1] for i in range(1,len(y))]
e=np.sum(np.power(e, 2))#e^2
d=np.round(e/sse,3)
ρ1=1-d/2
e2=[res[i]*res[i-1] for i in range(1,len(y))]
e2=np.sum(e2)
ρ2 = np.round(e2/ sse,3)
return d,ρ1,ρ2
print("d=%s,ρ1=%sρ2=%s"%(DW2(data_new['y_new'],data_new[['constant','x_new']])))
d=1.427,ρ1=0.2865 ρ2=0.24
d=1.427>dL,acceptHo不存在自相关
Ex2 analysis of housing starts
data=pd.read_csv("C:/Users/可乐怪/Desktop/csv/P207.csv")
data['constant']=1
model=sm.OLS(data['H'],data[['constant','P']]).fit()
print(model.summary())
outliers=model.get_influence()
res=outliers.resid_studentized_internal
index=range(1,26)
plt.plot(index,res,'bo-.')
plt.xlabel("index",size=15)
plt.ylabel("residuals",size=15)
plt.title('Figure 8.3',size=20,c="#653700")
plt.axhline(y=0,ls="-",c="r",alpha=0.5)#添加水平直线
plt.show()
存在强自相关,引入变量Dt
model2=sm.OLS(data['H'],data[['constant','P','D']]).fit()
print(model2.summary())
outliers=model3.get_influence()
res2=outliers.resid_studentized_internal
index=range(1,26)
plt.plot(index,res2,'bo-.')
plt.xlabel("index",size=15)
plt.ylabel("residuals",size=15)
plt.title('Figure 8.4',size=20,c="#653700")
plt.axhline(y=0,ls="-",c="r",alpha=0.5)#添加水平直线
plt.show()
d=1.852>dL,acceptHo不存在自相关
Ex3
拟合模型
data=pd.read_csv("C:/Users/可乐怪/Desktop/csv/P212.csv")
data['constant']=1
data['index']=range(0,40)
model=sm.OLS(data['Sales'],data[['PDI','constant']]).fit()
print(model.summary())
outliers=model.get_influence()
res=outliers.resid_studentized_internal
data['res']=res
data2=data.loc[data['Season']==1]#通过行标签索引行数据
data3=data.loc[data['Season']==0]
plt.scatter(data2['index'],data2['res'],c='r',marker='+')
plt.scatter(data3['index'],data3['res'],c='b',marker='o')
plt.xlabel("index",size=15)
plt.ylabel("residuals",size=15)
plt.title("Figure8.5",size=20,c="#653700")
plt.legend(['1,4Q','2,3Q'])#
plt.show()
忽视了季节效应,引入指示变量去消除
data2=data.loc[data['Season']==1]#通过行标签索引行数据
data3=data.loc[data['Season']==0]
model2=sm.OLS(data2['Sales'],data2[['constant','PDI']]).fit()
model3=sm.OLS(data3['Sales'],data3[['constant','PDI']]).fit()
fit2=model2.fittedvalues
fit3=model3.fittedvalues
plt.plot(data2['PDI'],fit2,c='r',alpha=0.7)
plt.plot(data3['PDI'],fit3,c='b',alpha=0.7)
plt.yticks([])#取消坐标刻度
plt.xticks([])
plt.ylim(30)
plt.xlim(130,200)#plt.xlim(xmin,xmax)----xmin:x轴上的显示下限 xmax:x轴上的显示上限
plt.xlabel("PDI",size=15)
plt.ylabel("S",size=15)
plt.title("Figure8.6",size=20,c="#653700")
plt.text(146,45,"Slope=β1",rotation=19,size=15)#给图中的点加标签
plt.text(175,51,"Winter",rotation=19,size=15)
plt.text(146,40,"Slope=β1",rotation=19,size=15)
plt.text(175,45,"Summer",rotation=19,size=15)
plt.text(131,32,'β0',size=15)
plt.text(131,38,'β1',size=15)
plt.show()
model2=sm.OLS(data['Sales'],data[['constant','PDI','Season']]).fit()
print(model2.summary())
model2.conf_int()
outliers2=model2.get_influence()
res2=outliers2.resid_studentized_internal
data['res2']=res2
data2=data.loc[data['Season']==1]#通过行标签索引行数据
data3=data.loc[data['Season']==0]
plt.scatter(data2['index'],data2['res2'],c='r',marker='+')
plt.scatter(data3['index'],data3['res2'],c='b',marker='o')
plt.xlabel("index",size=15)
plt.ylabel("residuals",size=15)
plt.title("Figure8.7",size=20,c="#653700")
plt.legend(['1,4Q','2,3Q'])#
plt.show()
如图季节效应被消除了¶