CHAPTER 9 ANALYSIS OF COLLINEAR DATA

Ex1

table 9.3

import pandas as pd 
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.stats.outliers_influence 
from prettytable import PrettyTable
data['constant']=1
data['index']=range(1,71)
print(model.summary())

在这里插入图片描述

outliers=model.get_influence()
res=outliers.resid_studentized_internal
plt.xlabel("Predicted",size=15)
plt.ylabel("Residuals",size=15)
plt.title("Figure9.1",size=20,c="teal")
plt.axhline(y=0,ls="-.",c="r",alpha=0.5)
plt.axvline(x=0,ls="-.",c="r",alpha=0.5)
color=['r' if abs(i)>2.5 else 'b' for i in res]
for i in range(len(res)):
    if abs(res[i])>2.5:
        plt.annotate("(%s,%s)" % (data['index'][i],round(res[i],2)),xy=(predicted[i],res[i]),size=15)
plt.scatter(predicted,res,c=color)

在这里插入图片描述

f=f.ppf(0.99,3,66)
f

R^2=0.206;F= 5.717>4.093,
it is accepted that FAM, PEER, and SCHOOL are valid predictor variables. However, the individual t-valuesare all small. In total, the summary statistics say that the three predictors taken together are important but from the t-values, it follows that any one predictor may be deleted from the model provided the other two are retained.
extreme multicollinearity is present

Scatter plots of the three predictor variables

    x_mean=np.mean(x)
    y_mean=np.mean(y)
    fenzi=0
    for i in range(len(x)):
        fenzi+=(x[i]-x_mean)*(y[i]-y_mean)
    fenmu1=0
    for i in range(len(x)):
        fenmu1+=(x[i]-x_mean)**2
    fenmu2=0
    for i in range(len(x)):
        fenmu2+=(y[i]-y_mean)**2
    fenmu=(fenmu1*fenmu2)**0.5
    cor=round(fenzi/fenmu,3)
    return cor
plt.xticks([])
plt.yticks([])
plt.subplots_adjust(hspace=0,wspace=0)#wspace, hspace:子图之间的横向间距、纵向间距分别与子图平均宽度、平均高度的比值。
plt.text(0.3,0.4,'FAM',c='b',size=15)


plt.subplot(332)
plt.xticks([])
plt.yticks([])
plt.scatter(data['PEER'],data['FAM'],c='r',s=2)

plt.subplot(333)
plt.xticks([])
plt.yticks([])
plt.scatter(data['SCHOOL'],data['FAM'],s=3.5,c='r')

plt.subplot(334)
plt.xticks([])
plt.yticks([])
plt.text(0.3,0.4,cor(data['FAM'],data['PEER']),c='g',size=15)

plt.subplot(335)
plt.xticks([])
plt.yticks([])
plt.text(0.3,0.4,'PEER',c='b',size=15)

plt.subplot(336)
plt.xticks([])
plt.yticks([])
plt.scatter(data['SCHOOL'],data['PEER'],s=3.5,c='r')

plt.subplot(337)
plt.xticks([])
plt.yticks([])
plt.text(0.3,0.5,cor(data['FAM'],data['SCHOOL']),c='g',size=15)

plt.subplot(338)
plt.xticks([])
plt.yticks([])
plt.text(0.3,0.5,cor(data['PEER'],data['SCHOOL']),c='g',size=15)

plt.subplot(339)
plt.xticks([])
plt.yticks([])
plt.text(0.3,0.5,'SCHOOL',c='b',size=15)
plt.suptitle("Figure9.2",size=20,c="teal")

plt.show()

在这里插入图片描述

Ex2 aggregate data concerning import activity in the French economy

table9.6

1

data['constant']=1
data['index']=range(1,19)
y=data['import']
x=data.iloc[:,2:5]
x=x.values
y=y.values
    ones=np.ones(shape=((X.shape)[0],1))
    X=np.hstack([ones,X])
    beta=(np.linalg.inv((X.T).dot(X))).dot(X.T).dot(Y)
    y_yuce=X.dot(beta)
    y_mean=np.mean(Y)
    SSE=0
    SST=0
    length=np.shape(X)[0]
    for i in range(length):
        SSE+=((Y[i]-y_yuce[i])**2)
        SST+=((Y[i]-y_mean)**2)
    SST=np.round(SST,4)
    SSE=np.round(SSE,4)
    SSR=np.round(SST-SSE,4)
    df_SSR=np.shape(X)[1]-1
    df_SSE=np.shape(X)[0]-np.shape(X)[1]
    MSR=np.round(SSR/df_SSR,4)
    MSE=np.round(SSE/df_SSE,4)
    F=np.round(MSR/MSE,4)
    label=['source','sum of square','df','mean square']
    table=PrettyTable(label)
    table.add_row(['regression',SSR,df_SSR,MSR])
    table.add_row(['residuals',SSE,df_SSE,MSE])
    table.add_column('F',[F,'--'])
    print(table)
    return 
y=data['import']
x=data.iloc[:,2:5]
x=x.values
y=y.values
F(y,x)

在这里插入图片描述

2

print(model.summary())

在这里插入图片描述

res=outliers.resid_studentized_internal
plt.plot(data['index'],res,'bo-')
plt.xlabel("Index",size=15)
plt.ylabel("Residuals",size=15)
plt.title("Figure9.3",size=20,c='teal')
plt.axhline(y=0,ls="-.",c="r",alpha=0.5)
plt.legend(['Import data (1949-1966)'],loc=(0.3, 0.9))
plt.show()

在这里插入图片描述
1.图9.3出现特别模式,模型不适合.multicollinearity is present(虽然R^2=0.973t都很小
2.根据资料我们去改进model如何构造model2

table 9.71___9.72

1

model2=sm.OLS(data2['import'],data2[['constant','doprod','stock','consum']]).fit()
print(model2.summary())

在这里插入图片描述

2

x=data2.iloc[:,2:5]
x=x.values
y=y.values
F(y,x)

在这里插入图片描述

res2=outliers2.resid_studentized_internal
plt.plot(data2['index'],res2,'bo-')
plt.xlabel("Index",size=15)
plt.ylabel("Residuals",size=15)
plt.title("Figure9.4",size=20,c='teal')
plt.axhline(y=0,ls="-.",c="r",alpha=0.5)
plt.legend(['Import data (1949-1959)'],loc=(0.3, 0.9))
plt.show()

在这里插入图片描述
图9.4**没有特别模式了纯随机,R^2=0.9
但是doprod的系数是负的并且无显著性与假设矛盾
。推测multicollinearity is present

Regression Coefficients for All Possible Regressions

model2=sm.OLS(data2['import'],data2[['constant','stock']]).fit()
model3=sm.OLS(data2['import'],data2[['constant','consum']]).fit()
model4=sm.OLS(data2['import'],data2[['constant','doprod','stock']]).fit()
model5=sm.OLS(data2['import'],data2[['constant','doprod','consum']]).fit()
model6=sm.OLS(data2['import'],data2[['constant','stock','consum']]).fit()
model7=sm.OLS(data2['import'],data2[['constant','doprod','stock','consum']]).fit()
a1.insert(2,'--')
a1.insert(3,'--')
a1.insert(0,1)
a2=np.round(model2.params,3).values.tolist()
a2.insert(1,'--')
a2.insert(3,'--')
a2.insert(0,2)
a3=np.round(model3.params,3).values.tolist()
a3.insert(1,'--')
a3.insert(2,'--')
a3.insert(0,3)
a4=np.round(model4.params,3).values.tolist()
a4.insert(3,'--')
a4.insert(0,4)
a5=np.round(model5.params,3).values.tolist()
a5.insert(2,'--')
a5.insert(0,5)
a6=np.round(model6.params,3).values.tolist()
a6.insert(1,'--')
a6.insert(0,6)
a7=np.round(model7.params,3).values.tolist()
a7.insert(0,7)
table=PrettyTable(['Regression','Constant','DOPROD','STOCK','CONSUM'])
table.add_row(a1)
table.add_row(a2)
table.add_row(a3)
table.add_row(a4)
table.add_row(a5)
table.add_row(a6)
table.add_row(a7)
print('table 9.8')
print(table)

在这里插入图片描述

Ex3 Aggregate sales of a firm in period t

data['constant']=1
data['index']=range(1,23)
model=sm.OLS(data['s_t'],data[['constant','a_t','p_t','e_t','a_t1','p_t1']]).fit()
print(model.summary())

predicted=model.fittedvalues
outliers=model.get_influence()
res=outliers.resid_studentized_internal
plt.xlabel("Predicted",size=15)
plt.ylabel("Residuals",size=15)
plt.title("Figure9.5",size=20,c="teal")
plt.scatter(predicted,res,c='b')
plt.show()

在这里插入图片描述

res=outliers.resid_studentized_internal
plt.plot(data['index'],res,'bo-')
plt.xlabel("Index",size=15)
plt.ylabel("Residuals",size=15)
plt.title("Figure9.6",size=20,c='teal')
plt.axhline(y=0,ls="-.",c="r",alpha=0.5)
plt.show()

在这里插入图片描述
散点图和残差图没有任何模式随机分布

x=data.iloc[:,1:6]
x=x.values
y=y.values
F(y,x)

在这里插入图片描述

VIF

vif3=[]
for i in range(5):
    a=round(variance_inflation_factor(data.iloc[:,1:7].values,i),2)
    vif3.insert(i,a)
print(vif3)
table=pd.DataFrame({'vif1':vif1,'vif2':vif2,'vif3':vif3})
table

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值