5.5
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
(1)3个示性变量
data=pd.read_csv('C:/Users/可乐怪/Desktop/csv/P147.csv')
dummy=pd.get_dummies(data['Fertilizer'])#将data的列名映射到prefix,类别变量转换为one-hot编码
# data=pd.DataFrame({'F1':dummy['F_1'],'F2':dummy['F_2'],'F3':dummy['F_3']})
data['F1']=dummy[1]
data['F2']=dummy[2]
data['F3']=dummy[3]
data.insert(1,'constant',1)#在第二列插入常数列1
data['F']=data['F1']+data['F2']+data['F3']
data
Yield | constant | Fertilizer | F1 | F2 | F3 | F | |
---|---|---|---|---|---|---|---|
0 | 31 | 1 | 1 | 1 | 0 | 0 | 1 |
1 | 34 | 1 | 1 | 1 | 0 | 0 | 1 |
2 | 34 | 1 | 1 | 1 | 0 | 0 | 1 |
3 | 34 | 1 | 1 | 1 | 0 | 0 | 1 |
4 | 43 | 1 | 1 | 1 | 0 | 0 | 1 |
5 | 35 | 1 | 1 | 1 | 0 | 0 | 1 |
6 | 38 | 1 | 1 | 1 | 0 | 0 | 1 |
7 | 36 | 1 | 1 | 1 | 0 | 0 | 1 |
8 | 36 | 1 | 1 | 1 | 0 | 0 | 1 |
9 | 45 | 1 | 1 | 1 | 0 | 0 | 1 |
10 | 27 | 1 | 2 | 0 | 1 | 0 | 1 |
11 | 27 | 1 | 2 | 0 | 1 | 0 | 1 |
12 | 25 | 1 | 2 | 0 | 1 | 0 | 1 |
13 | 34 | 1 | 2 | 0 | 1 | 0 | 1 |
14 | 21 | 1 | 2 | 0 | 1 | 0 | 1 |
15 | 36 | 1 | 2 | 0 | 1 | 0 | 1 |
16 | 34 | 1 | 2 | 0 | 1 | 0 | 1 |
17 | 30 | 1 | 2 | 0 | 1 | 0 | 1 |
18 | 32 | 1 | 2 | 0 | 1 | 0 | 1 |
19 | 33 | 1 | 2 | 0 | 1 | 0 | 1 |
20 | 36 | 1 | 3 | 0 | 0 | 1 | 1 |
21 | 37 | 1 | 3 | 0 | 0 | 1 | 1 |
22 | 37 | 1 | 3 | 0 | 0 | 1 | 1 |
23 | 34 | 1 | 3 | 0 | 0 | 1 | 1 |
24 | 37 | 1 | 3 | 0 | 0 | 1 | 1 |
25 | 28 | 1 | 3 | 0 | 0 | 1 | 1 |
26 | 33 | 1 | 3 | 0 | 0 | 1 | 1 |
27 | 29 | 1 | 3 | 0 | 0 | 1 | 1 |
28 | 36 | 1 | 3 | 0 | 0 | 1 | 1 |
29 | 42 | 1 | 3 | 0 | 0 | 1 | 1 |
30 | 33 | 1 | 4 | 0 | 0 | 0 | 0 |
31 | 27 | 1 | 4 | 0 | 0 | 0 | 0 |
32 | 35 | 1 | 4 | 0 | 0 | 0 | 0 |
33 | 25 | 1 | 4 | 0 | 0 | 0 | 0 |
34 | 29 | 1 | 4 | 0 | 0 | 0 | 0 |
35 | 20 | 1 | 4 | 0 | 0 | 0 | 0 |
36 | 25 | 1 | 4 | 0 | 0 | 0 | 0 |
37 | 40 | 1 | 4 | 0 | 0 | 0 | 0 |
38 | 35 | 1 | 4 | 0 | 0 | 0 | 0 |
39 | 29 | 1 | 4 | 0 | 0 | 0 | 0 |
model=sm.OLS(data['Yield'],data[data.columns[2:6]]).fit()
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Yield R-squared: 0.300
Model: OLS Adj. R-squared: 0.242
Method: Least Squares F-statistic: 5.144
Date: Wed, 27 Oct 2021 Prob (F-statistic): 0.00460
Time: 18:39:36 Log-Likelihood: -117.79
No. Observations: 40 AIC: 243.6
Df Residuals: 36 BIC: 250.3
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Fertilizer 7.4500 0.383 19.442 0.000 6.673 8.227
F1 29.1500 1.580 18.450 0.000 25.946 32.354
F2 15.0000 1.714 8.753 0.000 11.524 18.476
F3 12.5500 1.916 6.550 0.000 8.664 16.436
==============================================================================
Omnibus: 0.139 Durbin-Watson: 2.049
Prob(Omnibus): 0.933 Jarque-Bera (JB): 0.348
Skew: 0.050 Prob(JB): 0.840
Kurtosis: 2.554 Cond. No. 7.62
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
(b)拟合模型:yij= 7.4500+29.1500Fi1+15.0000Fi2 +12.5500Fi3+εij
©由上表可以得知F=5.144,对应的p值为0.00460,因此可以认为三种肥料对产量是有影响的
model=sm.OLS(data['Yield'],data[['constant','F']]).fit()
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Yield R-squared: 0.099
Model: OLS Adj. R-squared: 0.076
Method: Least Squares F-statistic: 4.190
Date: Wed, 27 Oct 2021 Prob (F-statistic): 0.0476
Time: 19:05:44 Log-Likelihood: -122.83
No. Observations: 40 AIC: 249.7
Df Residuals: 38 BIC: 253.0
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
constant 29.8000 1.692 17.608 0.000 26.374 33.226
F 4.0000 1.954 2.047 0.048 0.044 7.956
==============================================================================
Omnibus: 0.377 Durbin-Watson: 1.786
Prob(Omnibus): 0.828 Jarque-Bera (JB): 0.088
Skew: -0.114 Prob(JB): 0.957
Kurtosis: 3.038 Cond. No. 3.78
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
(d)由上表可以得知F=4.19,对应的p值为0.0476<0.05,因此可以认为三种肥料的影响是不同的
(e)从F1回归系数最大,F1的影响最大
5.1
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import scipy.stats as ss
data=pd.read_csv('C:/Users/可乐怪/Desktop/csv/P132.csv')
data.insert(1,'constant',1)
plt.scatter(data['TEST'],data['JPERF'],color='r')
plt.xlabel('test',size=20)
plt.ylabel('jperf',size=20)
plt.show()
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-qSry0oKg-1637065425424)(output_13_0.png)]
(b)F=2.21<4.45,accept H0:γ=0
print(ss.f.ppf(0.95,1,17))
model_f=sm.OLS(data['JPERF'],data[['constant','TEST','RACE']]).fit()
model_r=sm.OLS(data['JPERF'],data[['constant','TEST']]).fit()
res_f=data['JPERF']-model_f.fittedvalues
res_r=data['JPERF']-model_r.fittedvalues
SSE_f=sum(res_f**2)
SSE_r=sum(res_r**2)
F=((SSE_r-SSE_f)/1)/(SSE_f/(17))
print('F='+str(F))
4.451321772468133
F=2.212086949825205
©t=1.48<2.10,accept H0:γ=0
print(ss.t.ppf(0.975,17))
print(1.02/0.69)
print(model_f.summary())
2.1098155778331806
1.4782608695652175
OLS Regression Results
==============================================================================
Dep. Variable: JPERF R-squared: 0.572
Model: OLS Adj. R-squared: 0.522
Method: Least Squares F-statistic: 11.38
Date: Wed, 27 Oct 2021 Prob (F-statistic): 0.000731
Time: 20:14:33 Log-Likelihood: -35.390
No. Observations: 20 AIC: 76.78
Df Residuals: 17 BIC: 79.77
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
constant 0.6120 0.887 0.690 0.500 -1.260 2.483
TEST 2.2988 0.522 4.400 0.000 1.197 3.401
RACE 1.0276 0.691 1.487 0.155 -0.430 2.485
==============================================================================
Omnibus: 0.251 Durbin-Watson: 3.028
Prob(Omnibus): 0.882 Jarque-Bera (JB): 0.437
Skew: -0.059 Prob(JB): 0.804
Kurtosis: 2.286 Cond. No. 5.72
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
(d)发现t检验的平方=F检验
print(1.48*1.48)
2.1904