CHAPTER 11 VARIABLE SELECTION PROCEDURES

最新推荐文章于 2022-07-16 12:57:30 发布

可乐怪C

最新推荐文章于 2022-07-16 12:57:30 发布

阅读量242

点赞数

分类专栏：回归文章标签： python 开发语言

本文链接：https://blog.csdn.net/weixin_48697939/article/details/121658742

版权

回归专栏收录该内容

8 篇文章 0 订阅

订阅专栏

11.10 A STUDY OF SUPERVISOR PERFORMANCE

import pandas as pd
from statsmodels.stats.outliers_influence import variance_inflation_factor
import numpy as np
import statsmodels.api as sm
from prettytable import PrettyTable
import matplotlib.pyplot as plt

data=pd.read_csv('C:/Users/可乐怪/Desktop/csv/P043.csv')
data.insert(1,'constant',1)

vif and λ cor

vif=[]
for i in range(6):
    a=variance_inflation_factor(data.iloc[:,1:].values,i+1)
    vif.append(round(a,1))
vif

[2.7, 1.6, 2.3, 3.1, 1.2, 2.0]

cor=data.iloc[:,2:].corr()
λ=np.linalg.eig(cor.values)[0]
λ=sorted(np.round(λ,3),reverse = True)
λ

[3.169, 1.006, 0.763, 0.553, 0.317, 0.192]

round(cor,3)

	x1	x2	x3	x4	x5	x6
x1	1.000	0.558	0.597	0.669	0.188	0.225
x2	0.558	1.000	0.493	0.445	0.147	0.343
x3	0.597	0.493	1.000	0.640	0.116	0.532
x4	0.669	0.445	0.640	1.000	0.377	0.574
x5	0.188	0.147	0.116	0.377	1.000	0.283
x6	0.225	0.343	0.532	0.574	0.283	1.000

Table 11.2Forward-3Backward

variables1=[['constant','x1'],
           ['constant','x1','x3'],
           ['constant','x1','x3','x6'],
           ['constant','x1','x3','x6','x2'],
           ['constant','x1','x3','x6','x2','x4'],
           ['constant','x1','x3','x6','x2','x4','x5'],]

def sele(data,variables):
    t=[]
    rms=[]
    cp=[]
    aic=[]
    bic=[]
    for i in range(len(variables)):
        model=sm.OLS(data['y'],data[variables[i]]).fit()
        t.append(np.min(np.abs(model.tvalues[1:])))
        rms.append((model.ssr/(len(data)-len(variables[i])))**0.5)
        model2=sm.OLS(data['y'],data.iloc[:,1:]).fit()
        cp.append(model.ssr/(model2.ssr/23)+2*len(variables[i])-len(data))
        aic.append(2*len(variables[i])+len(data)*np.log(model.ssr/len(data)))
        bic.append(len(variables[i])*np.log(len(data))+len(data)*np.log(model.ssr/len(data)))
    table=PrettyTable()
    table.add_column('Variables',variables)
    table.add_column('min(|t|)',np.round(t,3))
    table.add_column('RMS',np.round(rms,3))
    table.add_column('Cp',np.round(cp,3))
    table.add_column('AIC',np.round(aic,3))
    table.add_column('BIC',np.round(bic,3))
    print(table)
    return 
sele(data,variables1)

+--------------------------------------------------+----------+-------+-------+---------+---------+
|                    Variables                     | min(|t|) |  RMS  |   Cp  |   AIC   |   BIC   |
+--------------------------------------------------+----------+-------+-------+---------+---------+
|                ['constant', 'x1']                |  7.737   | 6.993 | 1.411 | 118.628 |  121.43 |
|             ['constant', 'x1', 'x3']             |  1.571   | 6.817 | 1.115 | 118.002 | 122.206 |
|          ['constant', 'x1', 'x3', 'x6']          |  1.291   | 6.734 | 1.603 |  118.14 | 123.744 |
|       ['constant', 'x1', 'x3', 'x6', 'x2']       |  0.588   | 6.821 |  3.28 | 119.727 | 126.733 |
|    ['constant', 'x1', 'x3', 'x6', 'x2', 'x4']    |   0.47   | 6.929 | 5.068 | 121.452 | 129.859 |
| ['constant', 'x1', 'x3', 'x6', 'x2', 'x4', 'x5'] |  0.261   | 7.068 |  7.0  | 123.364 | 133.172 |
+--------------------------------------------------+----------+-------+-------+---------+---------+

variables2=[['constant','x1','x2','x3','x4','x5','x6'],
            ['constant','x1','x2','x3','x4','x6'],
            ['constant','x1','x2','x3','x6'],
            ['constant','x1','x3','x6'],
            ['constant','x1','x3'],
            ['constant','x1'],]
sele(data,variables2)

+--------------------------------------------------+----------+-------+-------+---------+---------+
|                    Variables                     | min(|t|) |  RMS  |   Cp  |   AIC   |   BIC   |
+--------------------------------------------------+----------+-------+-------+---------+---------+
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6'] |  0.261   | 7.068 |  7.0  | 123.364 | 133.172 |
|    ['constant', 'x1', 'x2', 'x3', 'x4', 'x6']    |   0.47   | 6.929 | 5.068 | 121.452 | 129.859 |
|       ['constant', 'x1', 'x2', 'x3', 'x6']       |  0.588   | 6.821 |  3.28 | 119.727 | 126.733 |
|          ['constant', 'x1', 'x3', 'x6']          |  1.291   | 6.734 | 1.603 |  118.14 | 123.744 |
|             ['constant', 'x1', 'x3']             |  1.571   | 6.817 | 1.115 | 118.002 | 122.206 |
|                ['constant', 'x1']                |  7.737   | 6.993 | 1.411 | 118.628 |  121.43 |
+--------------------------------------------------+----------+-------+-------+---------+---------+

Table 11.4 Values of Cp Statistic (All Possible Equations) and Table 11.5

def Cp(data,variables):
    cp=[]
    for i in range(len(variables)):
        model=sm.OLS(data['y'],data[variables[i]]).fit()
        model2=sm.OLS(data['y'],data.iloc[:,1:]).fit()
        cp.append(model.ssr/(model2.ssr/23)+2*len(variables[i])-len(data))
    table=PrettyTable()
    table.add_column('Variables',variables)
    table.add_column('Cp',np.round(cp,3))
    print(table)
    return

variables3=[['constant','x1'],['constant','x2'],['constant','x1','x2'],
            ['constant','x3'],['constant','x1','x3'],['constant','x2','x3'],
            ['constant','x1','x2','x3'],['constant','x4'],
            ['constant','x1','x4'],['constant','x2','x4'],
            ['constant','x1','x2','x4'],['constant','x3','x4'],
            ['constant','x1','x3','x4'],['constant','x2','x3','x4'],
            ['constant','x1','x2','x3','x4'],['constant','x5'],
            ['constant','x1','x5'],['constant','x2','x5'],
            ['constant','x1','x2','x5'],['constant','x3','x5'],
            ['constant','x1','x3','x5'],['constant','x2','x3','x5'],
            ['constant','x1','x2','x3','x5'],['constant','x4','x5'],
            ['constant','x1','x4','x5'],['constant','x2','x4','x5'],
            ['constant','x1','x2','x4','x5'],['constant','x3','x4','x5'],
            ['constant','x1','x3','x4','x5'],['constant','x2','x3','x4','x5'],
            ['constant','x1','x2','x3','x4','x5'],['constant','x6'],
            ['constant','x1','x6'],['constant','x2','x6'],
            ['constant','x1','x2','x6'],['constant','x3','x6'],
            ['constant','x1','x3','x6'],['constant','x2','x3','x6'],
            ['constant','x1','x2','x3','x6'],['constant','x4','x6'],
            ['constant','x1','x4','x6'],['constant','x2','x4','x6'],
            ['constant','x1','x2','x4','x6'],['constant','x3','x4','x6'],
            ['constant','x1','x3','x4','x6'],['constant','x2','x3','x4','x6'],
            ['constant','x1','x2','x3','x4','x6'],['constant','x5','x6'],
            ['constant','x1','x5','x6'],['constant','x2','x5','x6'],
            ['constant','x1','x2','x5','x6'],['constant','x3','x5','x6'],
            ['constant','x1','x3','x5','x6'],['constant','x2','x3','x5','x6'],
            ['constant','x1','x2','x3','x5','x6'],['constant','x4','x5'],
            ['constant','x1','x4','x5','x6'],['constant','x2','x4','x5','x6'],
            ['constant','x1','x2','x4','x5','x6'],['constant','x3','x4','x5','x6'],
            ['constant','x1','x3','x4','x5','x6'],['constant','x2','x3','x4','x5','x6'],
            ['constant','x1','x2','x3','x4','x5','x6'],]
Cp(data,variables3)

+--------------------------------------------------+--------+
|                    Variables                     |   Cp   |
+--------------------------------------------------+--------+
|                ['constant', 'x1']                | 1.411  |
|                ['constant', 'x2']                | 44.396 |
|             ['constant', 'x1', 'x2']             | 3.261  |
|                ['constant', 'x3']                | 26.557 |
|             ['constant', 'x1', 'x3']             | 1.115  |
|             ['constant', 'x2', 'x3']             | 26.962 |
|          ['constant', 'x1', 'x2', 'x3']          | 2.514  |
|                ['constant', 'x4']                | 30.058 |
|             ['constant', 'x1', 'x4']             | 3.189  |
|             ['constant', 'x2', 'x4']             |  29.2  |
|          ['constant', 'x1', 'x2', 'x4']          |  4.99  |
|             ['constant', 'x3', 'x4']             | 23.25  |
|          ['constant', 'x1', 'x3', 'x4']          | 3.091  |
|          ['constant', 'x2', 'x3', 'x4']          | 24.558 |
|       ['constant', 'x1', 'x2', 'x3', 'x4']       | 4.495  |
|                ['constant', 'x5']                | 57.909 |
|             ['constant', 'x1', 'x5']             | 3.411  |
|             ['constant', 'x2', 'x5']             | 45.624 |
|          ['constant', 'x1', 'x2', 'x5']          |  5.26  |
|             ['constant', 'x3', 'x5']             | 27.94  |
|          ['constant', 'x1', 'x3', 'x5']          | 3.115  |
|          ['constant', 'x2', 'x3', 'x5']          | 28.53  |
|       ['constant', 'x1', 'x2', 'x3', 'x5']       | 4.511  |
|             ['constant', 'x4', 'x5']             | 31.622 |
|          ['constant', 'x1', 'x4', 'x5']          | 5.164  |
|          ['constant', 'x2', 'x4', 'x5']          | 30.817 |
|       ['constant', 'x1', 'x2', 'x4', 'x5']       | 6.967  |
|          ['constant', 'x3', 'x4', 'x5']          | 25.231 |
|       ['constant', 'x1', 'x3', 'x4', 'x5']       | 5.086  |
|       ['constant', 'x2', 'x3', 'x4', 'x5']       | 26.531 |
|    ['constant', 'x1', 'x2', 'x3', 'x4', 'x5']    | 6.483  |
|                ['constant', 'x6']                | 57.945 |
|             ['constant', 'x1', 'x6']             | 3.328  |
|             ['constant', 'x2', 'x6']             | 46.388 |
|          ['constant', 'x1', 'x2', 'x6']          | 5.225  |
|             ['constant', 'x3', 'x6']             | 24.823 |
|          ['constant', 'x1', 'x3', 'x6']          | 1.603  |
|          ['constant', 'x2', 'x3', 'x6']          | 24.62  |
|       ['constant', 'x1', 'x2', 'x3', 'x6']       |  3.28  |
|             ['constant', 'x4', 'x6']             | 27.725 |
|          ['constant', 'x1', 'x4', 'x6']          | 4.705  |
|          ['constant', 'x2', 'x4', 'x6']          | 25.91  |
|       ['constant', 'x1', 'x2', 'x4', 'x6']       | 6.626  |
|          ['constant', 'x3', 'x4', 'x6']          | 16.502 |
|       ['constant', 'x1', 'x3', 'x4', 'x6']       | 3.352  |
|       ['constant', 'x2', 'x3', 'x4', 'x6']       | 17.575 |
|    ['constant', 'x1', 'x2', 'x3', 'x4', 'x6']    | 5.068  |
|             ['constant', 'x5', 'x6']             | 58.762 |
|          ['constant', 'x1', 'x5', 'x6']          |  5.32  |
|          ['constant', 'x2', 'x5', 'x6']          | 47.605 |
|       ['constant', 'x1', 'x2', 'x5', 'x6']       | 7.218  |
|          ['constant', 'x3', 'x5', 'x6']          | 25.022 |
|       ['constant', 'x1', 'x3', 'x5', 'x6']       | 3.459  |
|       ['constant', 'x2', 'x3', 'x5', 'x6']       | 25.108 |
|    ['constant', 'x1', 'x2', 'x3', 'x5', 'x6']    | 5.136  |
|             ['constant', 'x4', 'x5']             | 31.622 |
|       ['constant', 'x1', 'x4', 'x5', 'x6']       | 6.692  |
|       ['constant', 'x2', 'x4', 'x5', 'x6']       | 27.743 |
|    ['constant', 'x1', 'x2', 'x4', 'x5', 'x6']    | 8.613  |
|       ['constant', 'x3', 'x4', 'x5', 'x6']       | 18.423 |
|    ['constant', 'x1', 'x3', 'x4', 'x5', 'x6']    |  5.29  |
|    ['constant', 'x2', 'x3', 'x4', 'x5', 'x6']    | 19.509 |
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6'] |  7.0   |
+--------------------------------------------------+--------+

variables4=[['constant','x1'],
           ['constant','x1','x4'],
           ['constant','x1','x4','x6'],
           ['constant','x1','x3','x4','x5'],
           ['constant','x1','x2','x3','x4','x5'],
           ['constant','x1','x2','x3','x4','x5','x6'],]
sele(data,variables4)

+--------------------------------------------------+----------+-------+-------+---------+---------+
|                    Variables                     | min(|t|) |  RMS  |   Cp  |   AIC   |   BIC   |
+--------------------------------------------------+----------+-------+-------+---------+---------+
|                ['constant', 'x1']                |  7.737   | 6.993 | 1.411 | 118.628 |  121.43 |
|             ['constant', 'x1', 'x4']             |   0.47   | 7.093 | 3.189 | 120.383 | 124.587 |
|          ['constant', 'x1', 'x4', 'x6']          |  0.687   | 7.163 | 4.705 | 121.844 | 127.449 |
|       ['constant', 'x1', 'x3', 'x4', 'x5']       |  0.069   |  7.08 | 5.086 | 121.968 | 128.974 |
|    ['constant', 'x1', 'x2', 'x3', 'x4', 'x5']    |  0.105   | 7.139 | 6.483 | 123.239 | 131.646 |
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6'] |  0.261   | 7.068 |  7.0  | 123.364 | 133.172 |
+--------------------------------------------------+----------+-------+-------+---------+---------+

cp=[]
for i in range(len(variables3)):
    model=sm.OLS(data['y'],data[variables3[i]]).fit()
    model2=sm.OLS(data['y'],data.iloc[:,1:]).fit()
    cp.append(model.ssr/(model2.ssr/23)+2*len(variables3[i])-len(data))  

p=[]
cp_new=[]
for i in range(len(variables3)):
    if cp[i]<10:
        cp_new.append(cp[i])
        p.append(len(variables3[i]))
plt.scatter(p,cp_new,c='y')
plt.xlabel('p',size=15)
plt.ylabel('Cp',size=15)
plt.suptitle("Figure11.1",size=20,c="teal")

data2=pd.DataFrame({'cp_new':cp_new,'p':p})
model2=sm.formula.ols('cp_new~p',data=data2).fit()
f=np.poly1d([model2.params[1],model2.params[0]])
plt.plot(p,f(p),c='b')
plt.show()

在这里插入图片描述

11.11 VARIABLE SELECTION WITH COLLINEAR DATA

Table 11.9

data=pd.read_csv('C:/Users/可乐怪/Desktop/csv/P297.csv')
data_scale=pd.DataFrame()
from sklearn import preprocessing
data_scale['H'] =preprocessing.scale(data['H'])
data_scale['G'] =preprocessing.scale(data['G'])
data_scale['M'] =preprocessing.scale(data['M']) 
data_scale['W']=preprocessing.scale(data['W'])
data_scale.insert(1,'constant',1)
data_scale

	H	constant	G	M	W
0	-1.049821	1	-1.457688	-2.108863	1.721572
1	-1.030764	1	-1.353707	-1.592856	1.392519
2	-1.054902	1	-1.185088	-1.051780	1.075343
3	-1.031399	1	-0.996797	-0.431318	0.769588
4	-0.765878	1	-0.603353	0.408499	0.474861
5	-0.670595	1	-0.176185	0.945397	0.190755
6	-0.239282	1	0.048640	0.435657	-0.083108
7	0.184409	1	0.270655	0.845120	-0.347102
8	0.404194	1	0.689392	1.191910	-0.601588
9	0.778973	1	1.068784	0.268529	-0.846907
10	1.342412	1	1.178386	-0.159736	-0.903634
11	1.404663	1	1.268316	0.145273	-1.311291
12	1.727990	1	1.248644	1.104168	-1.531008

model=sm.OLS(data_scale['H'],data_scale.iloc[:,2:]).fit()
print(model.summary())

                                 OLS Regression Results                                
=======================================================================================
Dep. Variable:                      H   R-squared (uncentered):                   0.975
Model:                            OLS   Adj. R-squared (uncentered):              0.967
Method:                 Least Squares   F-statistic:                              127.8
Date:                Wed, 01 Dec 2021   Prob (F-statistic):                    2.84e-08
Time:                        12:57:54   Log-Likelihood:                          5.4242
No. Observations:                  13   AIC:                                     -4.848
Df Residuals:                      10   BIC:                                     -3.154
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
G              0.2354      0.328      0.719      0.489      -0.495       0.965
M             -0.4047      0.086     -4.714      0.001      -0.596      -0.213
W             -1.0246      0.359     -2.858      0.017      -1.823      -0.226
==============================================================================
Omnibus:                        0.594   Durbin-Watson:                   2.123
Prob(Omnibus):                  0.743   Jarque-Bera (JB):                0.602
Skew:                           0.242   Prob(JB):                        0.740
Kurtosis:                       2.063   Cond. No.                         15.6
==============================================================================

Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.


D:\Anaconda\lib\site-packages\scipy\stats\stats.py:1603: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=13
  warnings.warn("kurtosistest only valid for n>=20 ... continuing "

Table 11.10

variables1=[['G'],['M'],['W'],['G','M'],['G','W'],['G','M','W'],['M','W']]
def Regression(variables):
    model=sm.OLS(data_scale['H'],data_scale[variables]).fit()
    table=PrettyTable()
    table.add_column('variable',variables)
    table.add_column('params',model.params)
    table.add_column('t_value',model.tvalues)
    print(table)
for i in variables1:
    Regression(i)

+----------+--------------------+-------------------+
| variable |       params       |      t_value      |
+----------+--------------------+-------------------+
|    G     | 0.9580545597955183 | 11.58046751097006 |
+----------+--------------------+-------------------+
+----------+--------------------+------------------+
| variable |       params       |     t_value      |
+----------+--------------------+------------------+
|    M     | 0.5464226879330865 | 2.26011000844851 |
+----------+--------------------+------------------+
+----------+---------------------+---------------------+
| variable |        params       |       t_value       |
+----------+---------------------+---------------------+
|    W     | -0.9469212860765703 | -10.203992009328719 |
+----------+---------------------+---------------------+
+----------+---------------------+--------------------+
| variable |        params       |      t_value       |
+----------+---------------------+--------------------+
|    G     |  1.149115809506588  | 12.493938392491058 |
|    M     | -0.2691869793154155 | -2.92677683816018  |
+----------+---------------------+--------------------+
+----------+---------------------+---------------------+
| variable |        params       |       t_value       |
+----------+---------------------+---------------------+
|    G     |  0.8681543482821978 |  1.6972058194211332 |
|    W     | -0.0912071656578326 | -0.1783062339475414 |
+----------+---------------------+---------------------+
+----------+----------------------+--------------------+
| variable |        params        |      t_value       |
+----------+----------------------+--------------------+
|    G     |  0.2354139032865854  | 0.7185343125921468 |
|    M     | -0.40468225819531545 | -4.713958990831133 |
|    W     | -1.0245539532036643  | -2.857832016932103 |
+----------+----------------------+--------------------+
+----------+---------------------+--------------------+
| variable |        params       |      t_value       |
+----------+---------------------+--------------------+
|    M     |  -0.429953761442835 | -5.615087247526293 |
|    W     | -1.2759328902217022 | -16.66336044262241 |
+----------+---------------------+--------------------+

11.14 SELECTION OF VARIABLES IN AN AIR POLLUTION STUDY

λ and Table 11.13

data=pd.read_csv('C:/Users/可乐怪/Desktop/csv/P302.csv')
λ=np.linalg.eig(data.iloc[:,1:])[0]
λ=sorted(np.round(λ,3),reverse = True)
print(λ[:5])
print(λ[5:10])
print(λ[10:15])

[4.877, 2.766, 2.293, 1.352, 1.223]
[1.087, 0.661, 0.481, 0.407, 0.245]
[0.194, 0.156, 0.117, 0.089, 0.046]

y=data.loc[:14,['y']] #z.T@Y=Y
x=data.iloc[:15,1:16] #z.T@z=X

def output(y,x):
    variable=x.columns[0:]
    y=y.values
    x=x.values
    β=np.linalg.inv(x)@y
    c=np.linalg.inv(x)
    vif=np.diagonal(c)
    σ_2=(1-y.T.dot(c).dot(y))/(60-len(y)-1)
    se=((σ_2**0.5)*(np.array(vif)**0.5)).reshape((len(y)),1)
    t=β/se
    r2=y.T.dot(c).dot(y)
    r2a=1-(59/(60-len(y)-1))*(1-r2)
    table=PrettyTable()
    table.add_column("variable",variable)
    table.add_column("Coefficient",np.round((β.reshape(len(y),)),3))
    table.add_column("se",np.round((se.reshape((len(y)),)),3))
    table.add_column("t-value",np.round((t.reshape((len(y)),)),3))
    table.add_column("VIF",np.round((vif),3))
    
    table2=PrettyTable()
    table2.add_column("n",[60])
    table2.add_column("R2",np.round(r2.reshape(-1),3))
    table2.add_column("R2a",np.round(r2a.reshape(-1),3))
    table2.add_column("σ",np.round((σ_2**0.5).reshape(-1),3))
    table2.add_column("df",[(60-len(y)-1)])
    print(table)
    print(table2)

output(y,x)

+----------+-------------+-------+---------+--------+
| variable | Coefficient |   se  | t-value |  VIF   |
+----------+-------------+-------+---------+--------+
|    x1    |    0.306    | 0.148 |  2.063  | 4.112  |
|    x2    |    -0.318   | 0.181 |  -1.755 | 6.134  |
|    x3    |    -0.237   | 0.146 |  -1.627 | 3.968  |
|    x4    |    -0.213   |  0.2  |  -1.064 | 7.464  |
|    x5    |    -0.232   | 0.152 |  -1.527 | 4.306  |
|    x6    |    -0.233   | 0.161 |  -1.448 | 4.852  |
|    x7    |    -0.052   | 0.146 |  -0.356 | 3.968  |
|    x8    |    0.084    | 0.094 |   0.89  | 1.656  |
|    x9    |     0.64    |  0.19 |  3.359  | 6.778  |
|   x10    |    -0.014   | 0.123 |  -0.112 | 2.843  |
|   x11    |    -0.009   | 0.216 |  -0.042 | 8.722  |
|   x12    |    -0.979   | 0.724 |  -1.353 | 97.924 |
|   x13    |    0.983    | 0.747 |  1.316  | 104.22 |
|   x14    |     0.09    |  0.15 |  0.599  | 4.209  |
|   x15    |    0.009    | 0.101 |  0.093  | 1.907  |
+----------+-------------+-------+---------+--------+
+----+-------+-------+-------+----+
| n  |   R2  |  R2a  |   σ   | df |
+----+-------+-------+-------+----+
| 60 | 0.764 | 0.684 | 0.073 | 44 |
+----+-------+-------+-------+----+

Figure 11.2 -4

from scipy.linalg import  solve
#先求k=k0,15个解
def trace(k,Y,X):
    Y=Y.values
    X=X.values
    kkk=[]
    for i in range(len(Y)):
        kkk.append(k)
    X=np.diag(kkk)+ X
    θ = solve(X,Y)
    return θ

kk=np.linspace(0,0.5,100)
for i in range(5):
    θ=[]
    for ii in range(100):
        a=trace(kk[ii],y,x)[i]
        θ.append(a)
    plt.plot(kk,θ)
    plt.annotate(str(i+1),xy=(kk[50],θ[50]))
plt.suptitle("Figure11.2",size=20,c="teal")
plt.show()

在这里插入图片描述

for i in range(5):
    θ=[]
    for ii in range(100):
        a=trace(kk[ii],y,x)[i+5]
        θ.append(a)
    plt.plot(kk,θ)
    plt.annotate(str(i+6),xy=(kk[30],θ[30]))
plt.suptitle("Figure11.3",size=20,c="teal")
plt.show()

在这里插入图片描述

for i in range(5):
    θ=[]
    for ii in range(100):
        a=trace(kk[ii],y,x)[i+10]
        θ.append(a)
    plt.plot(kk,θ)
    plt.annotate(str(i+11),xy=(kk[1],θ[1]))
plt.suptitle("Figure11.4",size=20,c="teal")
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-9e0gLc4n-1638347833603)(output_27_0.png)]

remaining variables: 1,2,3,4, 5,6,9, 12, 13, and 14

Table 11.14 Ten Predictor

x1=x.drop(labels=['x7','x8','x10','x11','x15'], axis=1)
x1=x1.drop([6,7,9,10,14])
y1=y.drop([6,7,9,10,14])
output(y1,x1)

+----------+-------------+-------+---------+--------+
| variable | Coefficient |   se  | t-value |  VIF   |
+----------+-------------+-------+---------+--------+
|    x1    |    0.306    | 0.135 |   2.26  | 3.747  |
|    x2    |    -0.345   | 0.119 |  -2.907 | 2.879  |
|    x3    |    -0.244   | 0.108 |  -2.256 | 2.391  |
|    x4    |    -0.222   | 0.175 |  -1.274 | 6.224  |
|    x5    |    -0.268   | 0.137 |  -1.959 | 3.811  |
|    x6    |    -0.292   | 0.103 |  -2.842 | 2.149  |
|    x9    |    0.664    |  0.14 |  4.748  |  3.99  |
|   x12    |    -1.001   | 0.658 |  -1.522 | 88.295 |
|   x13    |    1.001    | 0.673 |  1.488  | 92.398 |
|   x14    |    0.098    | 0.127 |  0.775  | 3.293  |
+----------+-------------+-------+---------+--------+
+----+------+-------+------+----+
| n  |  R2  |  R2a  |  σ   | df |
+----+------+-------+------+----+
| 60 | 0.76 | 0.711 | 0.07 | 49 |
+----+------+-------+------+----+

Figure 11.5-6

kk=np.linspace(0,0.5,100)
for i in range(5):
    θ=[]
    for ii in range(100):
        a=trace(kk[ii],y1,x1)[i]
        θ.append(a)
    plt.plot(kk,θ)
    plt.annotate(str(i+1),xy=(kk[50],θ[50]))
plt.suptitle("Figure11.5",size=20,c="teal")
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-0UA4RA5z-1638347833606)(output_31_0.png)]

kk=np.linspace(0,0.5,100)
for i in range(5):
    θ=[]
    for ii in range(100):
        a=trace(kk[ii],y1,x1)[i+5]
        θ.append(a)
    plt.plot(kk,θ)
    plt.annotate(str(i+6),xy=(kk[50],θ[50]))
plt.suptitle("Figure11.6",size=20,c="teal")
plt.show()

在这里插入图片描述

remaining variables: 1,2,3,4, 5,6,9 and 14

Table 11.15 Eight Predictor

x2=x1.drop(labels=['x12','x13'], axis=1)
x2=x2.drop([11,12])
y2=y1.drop([11,12])
output(y2,x2)

+----------+-------------+-------+---------+-------+
| variable | Coefficient |   se  | t-value |  VIF  |
+----------+-------------+-------+---------+-------+
|    x1    |    0.331    |  0.12 |  2.765  | 2.911 |
|    x2    |    -0.351   | 0.106 |  -3.313 | 2.279 |
|    x3    |    -0.217   | 0.104 |  -2.087 | 2.191 |
|    x4    |    -0.155   | 0.163 |  -0.946 | 5.419 |
|    x5    |    -0.221   | 0.134 |  -1.656 | 3.621 |
|    x6    |    -0.27    | 0.102 |  -2.654 | 2.097 |
|    x9    |    0.692    | 0.133 |  5.219  | 3.567 |
|   x14    |     0.23    | 0.083 |  2.767  | 1.405 |
+----------+-------------+-------+---------+-------+
+----+-------+-------+------+----+
| n  |   R2  |  R2a  |  σ   | df |
+----+-------+-------+------+----+
| 60 | 0.749 | 0.709 | 0.07 | 51 |
+----+-------+-------+------+----+

可乐怪C

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
CHAPTER 11 VARIABLE SELECTION PROCEDURES

11.10 A STUDY OF SUPERVISOR PERFORMANCEimport pandas as pdfrom statsmodels.stats.outliers_influence import variance_inflation_factorimport numpy as npimport statsmodels.api as smfrom prettytable import PrettyTableimport matplotlib.pyplot as pltdat
复制链接

扫一扫

专栏目录