Regression Chapter 8

最新推荐文章于 2024-09-09 00:00:00 发布

可乐怪C

最新推荐文章于 2024-09-09 00:00:00 发布

阅读量284

点赞数

分类专栏：回归文章标签： python 开发语言后端

本文链接：https://blog.csdn.net/weixin_48697939/article/details/121276919

版权

回归专栏收录该内容

8 篇文章 0 订阅

订阅专栏

本文探讨了使用Cochrane-Orcutt方法处理自相关问题，并通过实例分析住房启动数据，引入季节性变量以改进模型。首先，通过OLS回归处理Expenditure数据，发现自相关后采用该方法进行变量变换。接着，分析Housing Starts 数据时，加入季节性变量Dt以削弱自相关性。最后，展示了如何在销售数据中考虑季节效应，通过指示变量消除其影响。

摘要由CSDN通过智能技术生成

EX1

table8.2

使用Cochrane and Orcutt方法进行变量变换¶

Ex2 analysis of housing starts

EX1

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.stats.outliers_influence # 异常值检验

data=pd.read_csv("C:/Users/可乐怪/Desktop/csv/P199.csv")
data['constant']=1

table8.2

model=sm.OLS(data['Expenditure'],data[['constant','Stock']]).fit()
print(model.summary())
np.round(model.params,2)

                            OLS Regression Results                            
==============================================================================
Dep. Variable:            Expenditure   R-squared:                       0.957
Model:                            OLS   Adj. R-squared:                  0.955
Method:                 Least Squares   F-statistic:                     403.2
Date:                Thu, 11 Nov 2021   Prob (F-statistic):           8.99e-14
Time:                        14:13:22   Log-Likelihood:                -54.964
No. Observations:                  20   AIC:                             113.9
Df Residuals:                      18   BIC:                             115.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
constant    -154.7192     19.850     -7.794      0.000    -196.423    -113.016
Stock          2.3004      0.115     20.080      0.000       2.060       2.541
==============================================================================
Omnibus:                        2.018   Durbin-Watson:                   0.328
Prob(Omnibus):                  0.365   Jarque-Bera (JB):                1.446
Skew:                          -0.453   Prob(JB):                        0.485
Kurtosis:                       2.044   Cond. No.                     3.86e+03
============================================================================

constant   -154.72
Stock         2.30
dtype: float64

outliers=model.get_influence()
res=outliers.resid_studentized_internal
index=range(1,21,1)
plt.xlabel('Index',size=15)
plt.ylabel('Residuals',size=15)
plt.title('Figure 8.1',size=20,c="#653700")
plt.plot(index,res,'bo-.',alpha=0.7)
plt.axhline(y=0,ls="-",c="r",alpha=0.5)#添加水平直线
plt.show()

存在自相关

def runs(res):#游程检验
    n1=0
    n2=0
    for i in res:
        if i>0:
            n1+=1
        else:
            n2+=1
    μ=(2*n1*n2)/(n1+n2)+1
    σ=(((2*n1*n2*(2*n1*n2-n1-n2)))/((n1+n2)**2)/(n1+n2-1))**0.5
    return μ,σ

print("游程检验的均值和标准差为:\n",runs(res))

def DW1(y,x):
    model=sm.OLS(y,x).fit()
    res=y-model.fittedvalues
    sse=model.ssr#ols模型的残差平方和
    e=[res[i]-res[i-1] for i in range(1,len(y))]
    e=np.sum(np.power(e, 2))#e^2
    d=np.round(e/sse,3)
    ρ1=1-d/2
    e2=[res[i]*res[i-1] for i in range(1,len(y))]
    e2=np.sum(e2)
    ρ2 = np.round(e2/ sse,3)
    return d,ρ1,ρ2

print("d=%s,ρ1=%sρ2=%s"%(DW1(data['Expenditure'],data[['constant','Stock']])))
ρ2=DW1(data['Expenditure'],data[['constant','Stock']])[2]
global ρ2

d=0.328<dL,reject Ho存在自相关

使用Cochrane and Orcutt方法进行变量变换¶

y_new=[data['Expenditure'][i]-(ρ2)*data['Expenditure'][i-1] for i in range(1,len(data['Expenditure']))]
x_new=[data['Stock'][i]-(ρ2)*data['Stock'][i-1] for i in range(1,len(data['Expenditure']))]
data_new=pd.DataFrame({'y_new':y_new,'x_new':x_new,'constant':1})
model2=sm.OLS(data_new['y_new'],data_new[['constant','x_new']]).fit()
print(model2.summary())
np.round(model2.params,2)

model2.ssr

得到新的方程y_new=-53.64+x_new*2.64¶

β0=-53.64/(1-0.751)
np.round(β0,3)

原变量表示的方程：y=-215.422+x*2.64

outliers=model2.get_influence()
res=outliers.resid_studentized_internal
index=range(1,20,1)
plt.plot(index,res,'bo-.',alpha=0.7)
plt.xlabel('Index',size=15)
plt.ylabel('Residuals',size=15)
plt.title('Figure 8.2',size=20,c="#653700")
plt.axhline(y=0,ls="-",c="r",alpha=0.5)#添加水平直线
plt.show()
def DW2(y,x):
    model2=sm.OLS(y,x).fit()
    res=y-model2.fittedvalues
    sse=model2.ssr
    e=[res[i]-res[i-1] for i in range(1,len(y))]
    e=np.sum(np.power(e, 2))#e^2
    d=np.round(e/sse,3)
    ρ1=1-d/2
    e2=[res[i]*res[i-1] for i in range(1,len(y))]
    e2=np.sum(e2)
    ρ2 = np.round(e2/ sse,3)
    return d,ρ1,ρ2
print("d=%s,ρ1=%sρ2=%s"%(DW2(data_new['y_new'],data_new[['constant','x_new']])))

d=1.427,ρ1=0.2865 ρ2=0.24
d=1.427>dL,acceptHo不存在自相关

Ex2 analysis of housing starts

data=pd.read_csv("C:/Users/可乐怪/Desktop/csv/P207.csv")
data['constant']=1

model=sm.OLS(data['H'],data[['constant','P']]).fit()
print(model.summary())

outliers=model.get_influence()
res=outliers.resid_studentized_internal
index=range(1,26)
plt.plot(index,res,'bo-.')
plt.xlabel("index",size=15)
plt.ylabel("residuals",size=15)
plt.title('Figure 8.3',size=20,c="#653700")
plt.axhline(y=0,ls="-",c="r",alpha=0.5)#添加水平直线
plt.show()

存在强自相关，引入变量Dt

model2=sm.OLS(data['H'],data[['constant','P','D']]).fit()
print(model2.summary())

outliers=model3.get_influence()
res2=outliers.resid_studentized_internal
index=range(1,26)
plt.plot(index,res2,'bo-.')
plt.xlabel("index",size=15)
plt.ylabel("residuals",size=15)
plt.title('Figure 8.4',size=20,c="#653700")
plt.axhline(y=0,ls="-",c="r",alpha=0.5)#添加水平直线
plt.show()

d=1.852>dL,acceptHo不存在自相关

Ex3

拟合模型

data=pd.read_csv("C:/Users/可乐怪/Desktop/csv/P212.csv")
data['constant']=1
data['index']=range(0,40)

model=sm.OLS(data['Sales'],data[['PDI','constant']]).fit()
print(model.summary())

outliers=model.get_influence()
res=outliers.resid_studentized_internal
data['res']=res
data2=data.loc[data['Season']==1]#通过行标签索引行数据
data3=data.loc[data['Season']==0]
plt.scatter(data2['index'],data2['res'],c='r',marker='+')
plt.scatter(data3['index'],data3['res'],c='b',marker='o')
plt.xlabel("index",size=15)
plt.ylabel("residuals",size=15)
plt.title("Figure8.5",size=20,c="#653700")
plt.legend(['1,4Q','2,3Q'])#
plt.show()

忽视了季节效应，引入指示变量去消除

data2=data.loc[data['Season']==1]#通过行标签索引行数据
data3=data.loc[data['Season']==0]

model2=sm.OLS(data2['Sales'],data2[['constant','PDI']]).fit()
model3=sm.OLS(data3['Sales'],data3[['constant','PDI']]).fit()
fit2=model2.fittedvalues
fit3=model3.fittedvalues
plt.plot(data2['PDI'],fit2,c='r',alpha=0.7)
plt.plot(data3['PDI'],fit3,c='b',alpha=0.7)
plt.yticks([])#取消坐标刻度
plt.xticks([])
plt.ylim(30)
plt.xlim(130,200)#plt.xlim(xmin,xmax)----xmin：x轴上的显示下限 xmax：x轴上的显示上限
plt.xlabel("PDI",size=15)
plt.ylabel("S",size=15)
plt.title("Figure8.6",size=20,c="#653700")
plt.text(146,45,"Slope=β1",rotation=19,size=15)#给图中的点加标签
plt.text(175,51,"Winter",rotation=19,size=15)
plt.text(146,40,"Slope=β1",rotation=19,size=15)
plt.text(175,45,"Summer",rotation=19,size=15)
plt.text(131,32,'β0',size=15)
plt.text(131,38,'β1',size=15)
plt.show()

model2=sm.OLS(data['Sales'],data[['constant','PDI','Season']]).fit()
print(model2.summary())
model2.conf_int()

outliers2=model2.get_influence()
res2=outliers2.resid_studentized_internal
data['res2']=res2
data2=data.loc[data['Season']==1]#通过行标签索引行数据
data3=data.loc[data['Season']==0]
plt.scatter(data2['index'],data2['res2'],c='r',marker='+')
plt.scatter(data3['index'],data3['res2'],c='b',marker='o')
plt.xlabel("index",size=15)
plt.ylabel("residuals",size=15)
plt.title("Figure8.7",size=20,c="#653700")
plt.legend(['1,4Q','2,3Q'])#
plt.show()

如图季节效应被消除了¶

可乐怪C

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Regression Chapter 8

EX1import numpy as npimport pandas as pd import matplotlib.pyplot as pltimport statsmodels.api as smimport statsmodels.stats.outliers_influence # 异常值检验data=pd.read_csv("C:/Users/可乐怪/Desktop/csv/P199.csv")data['constant']=1table8.2model=sm.O
复制链接

扫一扫

专栏目录