11.10 A STUDY OF SUPERVISOR PERFORMANCE
import pandas as pd
from statsmodels. stats. outliers_influence import variance_inflation_factor
import numpy as np
import statsmodels. api as sm
from prettytable import PrettyTable
import matplotlib. pyplot as plt
data= pd. read_csv( 'C:/Users/可乐怪/Desktop/csv/P043.csv' )
data. insert( 1 , 'constant' , 1 )
vif and λ cor
vif= [ ]
for i in range ( 6 ) :
a= variance_inflation_factor( data. iloc[ : , 1 : ] . values, i+ 1 )
vif. append( round ( a, 1 ) )
vif
[2.7, 1.6, 2.3, 3.1, 1.2, 2.0]
cor= data. iloc[ : , 2 : ] . corr( )
λ= np. linalg. eig( cor. values) [ 0 ]
λ= sorted ( np. round ( λ, 3 ) , reverse = True )
λ
[3.169, 1.006, 0.763, 0.553, 0.317, 0.192]
round ( cor, 3 )
x1 x2 x3 x4 x5 x6 x1 1.000 0.558 0.597 0.669 0.188 0.225 x2 0.558 1.000 0.493 0.445 0.147 0.343 x3 0.597 0.493 1.000 0.640 0.116 0.532 x4 0.669 0.445 0.640 1.000 0.377 0.574 x5 0.188 0.147 0.116 0.377 1.000 0.283 x6 0.225 0.343 0.532 0.574 0.283 1.000
Table 11.2Forward-3Backward
variables1= [ [ 'constant' , 'x1' ] ,
[ 'constant' , 'x1' , 'x3' ] ,
[ 'constant' , 'x1' , 'x3' , 'x6' ] ,
[ 'constant' , 'x1' , 'x3' , 'x6' , 'x2' ] ,
[ 'constant' , 'x1' , 'x3' , 'x6' , 'x2' , 'x4' ] ,
[ 'constant' , 'x1' , 'x3' , 'x6' , 'x2' , 'x4' , 'x5' ] , ]
def sele ( data, variables) :
t= [ ]
rms= [ ]
cp= [ ]
aic= [ ]
bic= [ ]
for i in range ( len ( variables) ) :
model= sm. OLS( data[ 'y' ] , data[ variables[ i] ] ) . fit( )
t. append( np. min ( np. abs ( model. tvalues[ 1 : ] ) ) )
rms. append( ( model. ssr/ ( len ( data) - len ( variables[ i] ) ) ) ** 0.5 )
model2= sm. OLS( data[ 'y' ] , data. iloc[ : , 1 : ] ) . fit( )
cp. append( model. ssr/ ( model2. ssr/ 23 ) + 2 * len ( variables[ i] ) - len ( data) )
aic. append( 2 * len ( variables[ i] ) + len ( data) * np. log( model. ssr/ len ( data) ) )
bic. append( len ( variables[ i] ) * np. log( len ( data) ) + len ( data) * np. log( model. ssr/ len ( data) ) )
table= PrettyTable( )
table. add_column( 'Variables' , variables)
table. add_column( 'min(|t|)' , np. round ( t, 3 ) )
table. add_column( 'RMS' , np. round ( rms, 3 ) )
table. add_column( 'Cp' , np. round ( cp, 3 ) )
table. add_column( 'AIC' , np. round ( aic, 3 ) )
table. add_column( 'BIC' , np. round ( bic, 3 ) )
print ( table)
return
sele( data, variables1)
+--------------------------------------------------+----------+-------+-------+---------+---------+
| Variables | min(|t|) | RMS | Cp | AIC | BIC |
+--------------------------------------------------+----------+-------+-------+---------+---------+
| ['constant', 'x1'] | 7.737 | 6.993 | 1.411 | 118.628 | 121.43 |
| ['constant', 'x1', 'x3'] | 1.571 | 6.817 | 1.115 | 118.002 | 122.206 |
| ['constant', 'x1', 'x3', 'x6'] | 1.291 | 6.734 | 1.603 | 118.14 | 123.744 |
| ['constant', 'x1', 'x3', 'x6', 'x2'] | 0.588 | 6.821 | 3.28 | 119.727 | 126.733 |
| ['constant', 'x1', 'x3', 'x6', 'x2', 'x4'] | 0.47 | 6.929 | 5.068 | 121.452 | 129.859 |
| ['constant', 'x1', 'x3', 'x6', 'x2', 'x4', 'x5'] | 0.261 | 7.068 | 7.0 | 123.364 | 133.172 |
+--------------------------------------------------+----------+-------+-------+---------+---------+
variables2= [ [ 'constant' , 'x1' , 'x2' , 'x3' , 'x4' , 'x5' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x4' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x6' ] ,
[ 'constant' , 'x1' , 'x3' , 'x6' ] ,
[ 'constant' , 'x1' , 'x3' ] ,
[ 'constant' , 'x1' ] , ]
sele( data, variables2)
+--------------------------------------------------+----------+-------+-------+---------+---------+
| Variables | min(|t|) | RMS | Cp | AIC | BIC |
+--------------------------------------------------+----------+-------+-------+---------+---------+
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6'] | 0.261 | 7.068 | 7.0 | 123.364 | 133.172 |
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x6'] | 0.47 | 6.929 | 5.068 | 121.452 | 129.859 |
| ['constant', 'x1', 'x2', 'x3', 'x6'] | 0.588 | 6.821 | 3.28 | 119.727 | 126.733 |
| ['constant', 'x1', 'x3', 'x6'] | 1.291 | 6.734 | 1.603 | 118.14 | 123.744 |
| ['constant', 'x1', 'x3'] | 1.571 | 6.817 | 1.115 | 118.002 | 122.206 |
| ['constant', 'x1'] | 7.737 | 6.993 | 1.411 | 118.628 | 121.43 |
+--------------------------------------------------+----------+-------+-------+---------+---------+
Table 11.4 Values of Cp Statistic (All Possible Equations) and Table 11.5
def Cp ( data, variables) :
cp= [ ]
for i in range ( len ( variables) ) :
model= sm. OLS( data[ 'y' ] , data[ variables[ i] ] ) . fit( )
model2= sm. OLS( data[ 'y' ] , data. iloc[ : , 1 : ] ) . fit( )
cp. append( model. ssr/ ( model2. ssr/ 23 ) + 2 * len ( variables[ i] ) - len ( data) )
table= PrettyTable( )
table. add_column( 'Variables' , variables)
table. add_column( 'Cp' , np. round ( cp, 3 ) )
print ( table)
return
variables3= [ [ 'constant' , 'x1' ] , [ 'constant' , 'x2' ] , [ 'constant' , 'x1' , 'x2' ] ,
[ 'constant' , 'x3' ] , [ 'constant' , 'x1' , 'x3' ] , [ 'constant' , 'x2' , 'x3' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' ] , [ 'constant' , 'x4' ] ,
[ 'constant' , 'x1' , 'x4' ] , [ 'constant' , 'x2' , 'x4' ] ,
[ 'constant' , 'x1' , 'x2' , 'x4' ] , [ 'constant' , 'x3' , 'x4' ] ,
[ 'constant' , 'x1' , 'x3' , 'x4' ] , [ 'constant' , 'x2' , 'x3' , 'x4' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x4' ] , [ 'constant' , 'x5' ] ,
[ 'constant' , 'x1' , 'x5' ] , [ 'constant' , 'x2' , 'x5' ] ,
[ 'constant' , 'x1' , 'x2' , 'x5' ] , [ 'constant' , 'x3' , 'x5' ] ,
[ 'constant' , 'x1' , 'x3' , 'x5' ] , [ 'constant' , 'x2' , 'x3' , 'x5' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x5' ] , [ 'constant' , 'x4' , 'x5' ] ,
[ 'constant' , 'x1' , 'x4' , 'x5' ] , [ 'constant' , 'x2' , 'x4' , 'x5' ] ,
[ 'constant' , 'x1' , 'x2' , 'x4' , 'x5' ] , [ 'constant' , 'x3' , 'x4' , 'x5' ] ,
[ 'constant' , 'x1' , 'x3' , 'x4' , 'x5' ] , [ 'constant' , 'x2' , 'x3' , 'x4' , 'x5' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x4' , 'x5' ] , [ 'constant' , 'x6' ] ,
[ 'constant' , 'x1' , 'x6' ] , [ 'constant' , 'x2' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x6' ] , [ 'constant' , 'x3' , 'x6' ] ,
[ 'constant' , 'x1' , 'x3' , 'x6' ] , [ 'constant' , 'x2' , 'x3' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x6' ] , [ 'constant' , 'x4' , 'x6' ] ,
[ 'constant' , 'x1' , 'x4' , 'x6' ] , [ 'constant' , 'x2' , 'x4' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x4' , 'x6' ] , [ 'constant' , 'x3' , 'x4' , 'x6' ] ,
[ 'constant' , 'x1' , 'x3' , 'x4' , 'x6' ] , [ 'constant' , 'x2' , 'x3' , 'x4' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x4' , 'x6' ] , [ 'constant' , 'x5' , 'x6' ] ,
[ 'constant' , 'x1' , 'x5' , 'x6' ] , [ 'constant' , 'x2' , 'x5' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x5' , 'x6' ] , [ 'constant' , 'x3' , 'x5' , 'x6' ] ,
[ 'constant' , 'x1' , 'x3' , 'x5' , 'x6' ] , [ 'constant' , 'x2' , 'x3' , 'x5' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x5' , 'x6' ] , [ 'constant' , 'x4' , 'x5' ] ,
[ 'constant' , 'x1' , 'x4' , 'x5' , 'x6' ] , [ 'constant' , 'x2' , 'x4' , 'x5' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x4' , 'x5' , 'x6' ] , [ 'constant' , 'x3' , 'x4' , 'x5' , 'x6' ] ,
[ 'constant' , 'x1' , 'x3' , 'x4' , 'x5' , 'x6' ] , [ 'constant' , 'x2' , 'x3' , 'x4' , 'x5' , 'x6' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x4' , 'x5' , 'x6' ] , ]
Cp( data, variables3)
+--------------------------------------------------+--------+
| Variables | Cp |
+--------------------------------------------------+--------+
| ['constant', 'x1'] | 1.411 |
| ['constant', 'x2'] | 44.396 |
| ['constant', 'x1', 'x2'] | 3.261 |
| ['constant', 'x3'] | 26.557 |
| ['constant', 'x1', 'x3'] | 1.115 |
| ['constant', 'x2', 'x3'] | 26.962 |
| ['constant', 'x1', 'x2', 'x3'] | 2.514 |
| ['constant', 'x4'] | 30.058 |
| ['constant', 'x1', 'x4'] | 3.189 |
| ['constant', 'x2', 'x4'] | 29.2 |
| ['constant', 'x1', 'x2', 'x4'] | 4.99 |
| ['constant', 'x3', 'x4'] | 23.25 |
| ['constant', 'x1', 'x3', 'x4'] | 3.091 |
| ['constant', 'x2', 'x3', 'x4'] | 24.558 |
| ['constant', 'x1', 'x2', 'x3', 'x4'] | 4.495 |
| ['constant', 'x5'] | 57.909 |
| ['constant', 'x1', 'x5'] | 3.411 |
| ['constant', 'x2', 'x5'] | 45.624 |
| ['constant', 'x1', 'x2', 'x5'] | 5.26 |
| ['constant', 'x3', 'x5'] | 27.94 |
| ['constant', 'x1', 'x3', 'x5'] | 3.115 |
| ['constant', 'x2', 'x3', 'x5'] | 28.53 |
| ['constant', 'x1', 'x2', 'x3', 'x5'] | 4.511 |
| ['constant', 'x4', 'x5'] | 31.622 |
| ['constant', 'x1', 'x4', 'x5'] | 5.164 |
| ['constant', 'x2', 'x4', 'x5'] | 30.817 |
| ['constant', 'x1', 'x2', 'x4', 'x5'] | 6.967 |
| ['constant', 'x3', 'x4', 'x5'] | 25.231 |
| ['constant', 'x1', 'x3', 'x4', 'x5'] | 5.086 |
| ['constant', 'x2', 'x3', 'x4', 'x5'] | 26.531 |
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x5'] | 6.483 |
| ['constant', 'x6'] | 57.945 |
| ['constant', 'x1', 'x6'] | 3.328 |
| ['constant', 'x2', 'x6'] | 46.388 |
| ['constant', 'x1', 'x2', 'x6'] | 5.225 |
| ['constant', 'x3', 'x6'] | 24.823 |
| ['constant', 'x1', 'x3', 'x6'] | 1.603 |
| ['constant', 'x2', 'x3', 'x6'] | 24.62 |
| ['constant', 'x1', 'x2', 'x3', 'x6'] | 3.28 |
| ['constant', 'x4', 'x6'] | 27.725 |
| ['constant', 'x1', 'x4', 'x6'] | 4.705 |
| ['constant', 'x2', 'x4', 'x6'] | 25.91 |
| ['constant', 'x1', 'x2', 'x4', 'x6'] | 6.626 |
| ['constant', 'x3', 'x4', 'x6'] | 16.502 |
| ['constant', 'x1', 'x3', 'x4', 'x6'] | 3.352 |
| ['constant', 'x2', 'x3', 'x4', 'x6'] | 17.575 |
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x6'] | 5.068 |
| ['constant', 'x5', 'x6'] | 58.762 |
| ['constant', 'x1', 'x5', 'x6'] | 5.32 |
| ['constant', 'x2', 'x5', 'x6'] | 47.605 |
| ['constant', 'x1', 'x2', 'x5', 'x6'] | 7.218 |
| ['constant', 'x3', 'x5', 'x6'] | 25.022 |
| ['constant', 'x1', 'x3', 'x5', 'x6'] | 3.459 |
| ['constant', 'x2', 'x3', 'x5', 'x6'] | 25.108 |
| ['constant', 'x1', 'x2', 'x3', 'x5', 'x6'] | 5.136 |
| ['constant', 'x4', 'x5'] | 31.622 |
| ['constant', 'x1', 'x4', 'x5', 'x6'] | 6.692 |
| ['constant', 'x2', 'x4', 'x5', 'x6'] | 27.743 |
| ['constant', 'x1', 'x2', 'x4', 'x5', 'x6'] | 8.613 |
| ['constant', 'x3', 'x4', 'x5', 'x6'] | 18.423 |
| ['constant', 'x1', 'x3', 'x4', 'x5', 'x6'] | 5.29 |
| ['constant', 'x2', 'x3', 'x4', 'x5', 'x6'] | 19.509 |
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6'] | 7.0 |
+--------------------------------------------------+--------+
variables4= [ [ 'constant' , 'x1' ] ,
[ 'constant' , 'x1' , 'x4' ] ,
[ 'constant' , 'x1' , 'x4' , 'x6' ] ,
[ 'constant' , 'x1' , 'x3' , 'x4' , 'x5' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x4' , 'x5' ] ,
[ 'constant' , 'x1' , 'x2' , 'x3' , 'x4' , 'x5' , 'x6' ] , ]
sele( data, variables4)
+--------------------------------------------------+----------+-------+-------+---------+---------+
| Variables | min(|t|) | RMS | Cp | AIC | BIC |
+--------------------------------------------------+----------+-------+-------+---------+---------+
| ['constant', 'x1'] | 7.737 | 6.993 | 1.411 | 118.628 | 121.43 |
| ['constant', 'x1', 'x4'] | 0.47 | 7.093 | 3.189 | 120.383 | 124.587 |
| ['constant', 'x1', 'x4', 'x6'] | 0.687 | 7.163 | 4.705 | 121.844 | 127.449 |
| ['constant', 'x1', 'x3', 'x4', 'x5'] | 0.069 | 7.08 | 5.086 | 121.968 | 128.974 |
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x5'] | 0.105 | 7.139 | 6.483 | 123.239 | 131.646 |
| ['constant', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6'] | 0.261 | 7.068 | 7.0 | 123.364 | 133.172 |
+--------------------------------------------------+----------+-------+-------+---------+---------+
cp= [ ]
for i in range ( len ( variables3) ) :
model= sm. OLS( data[ 'y' ] , data[ variables3[ i] ] ) . fit( )
model2= sm. OLS( data[ 'y' ] , data. iloc[ : , 1 : ] ) . fit( )
cp. append( model. ssr/ ( model2. ssr/ 23 ) + 2 * len ( variables3[ i] ) - len ( data) )
p= [ ]
cp_new= [ ]
for i in range ( len ( variables3) ) :
if cp[ i] < 10 :
cp_new. append( cp[ i] )
p. append( len ( variables3[ i] ) )
plt. scatter( p, cp_new, c= 'y' )
plt. xlabel( 'p' , size= 15 )
plt. ylabel( 'Cp' , size= 15 )
plt. suptitle( "Figure11.1" , size= 20 , c= "teal" )
data2= pd. DataFrame( { 'cp_new' : cp_new, 'p' : p} )
model2= sm. formula. ols( 'cp_new~p' , data= data2) . fit( )
f= np. poly1d( [ model2. params[ 1 ] , model2. params[ 0 ] ] )
plt. plot( p, f( p) , c= 'b' )
plt. show( )
11.11 VARIABLE SELECTION WITH COLLINEAR DATA
Table 11.9
data= pd. read_csv( 'C:/Users/可乐怪/Desktop/csv/P297.csv' )
data_scale= pd. DataFrame( )
from sklearn import preprocessing
data_scale[ 'H' ] = preprocessing. scale( data[ 'H' ] )
data_scale[ 'G' ] = preprocessing. scale( data[ 'G' ] )
data_scale[ 'M' ] = preprocessing. scale( data[ 'M' ] )
data_scale[ 'W' ] = preprocessing. scale( data[ 'W' ] )
data_scale. insert( 1 , 'constant' , 1 )
data_scale
H constant G M W 0 -1.049821 1 -1.457688 -2.108863 1.721572 1 -1.030764 1 -1.353707 -1.592856 1.392519 2 -1.054902 1 -1.185088 -1.051780 1.075343 3 -1.031399 1 -0.996797 -0.431318 0.769588 4 -0.765878 1 -0.603353 0.408499 0.474861 5 -0.670595 1 -0.176185 0.945397 0.190755 6 -0.239282 1 0.048640 0.435657 -0.083108 7 0.184409 1 0.270655 0.845120 -0.347102 8 0.404194 1 0.689392 1.191910 -0.601588 9 0.778973 1 1.068784 0.268529 -0.846907 10 1.342412 1 1.178386 -0.159736 -0.903634 11 1.404663 1 1.268316 0.145273 -1.311291 12 1.727990 1 1.248644 1.104168 -1.531008
model= sm. OLS( data_scale[ 'H' ] , data_scale. iloc[ : , 2 : ] ) . fit( )
print ( model. summary( ) )
OLS Regression Results
=======================================================================================
Dep. Variable: H R-squared (uncentered): 0.975
Model: OLS Adj. R-squared (uncentered): 0.967
Method: Least Squares F-statistic: 127.8
Date: Wed, 01 Dec 2021 Prob (F-statistic): 2.84e-08
Time: 12:57:54 Log-Likelihood: 5.4242
No. Observations: 13 AIC: -4.848
Df Residuals: 10 BIC: -3.154
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
G 0.2354 0.328 0.719 0.489 -0.495 0.965
M -0.4047 0.086 -4.714 0.001 -0.596 -0.213
W -1.0246 0.359 -2.858 0.017 -1.823 -0.226
==============================================================================
Omnibus: 0.594 Durbin-Watson: 2.123
Prob(Omnibus): 0.743 Jarque-Bera (JB): 0.602
Skew: 0.242 Prob(JB): 0.740
Kurtosis: 2.063 Cond. No. 15.6
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
D:\Anaconda\lib\site-packages\scipy\stats\stats.py:1603: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=13
warnings.warn("kurtosistest only valid for n>=20 ... continuing "
Table 11.10
variables1= [ [ 'G' ] , [ 'M' ] , [ 'W' ] , [ 'G' , 'M' ] , [ 'G' , 'W' ] , [ 'G' , 'M' , 'W' ] , [ 'M' , 'W' ] ]
def Regression ( variables) :
model= sm. OLS( data_scale[ 'H' ] , data_scale[ variables] ) . fit( )
table= PrettyTable( )
table. add_column( 'variable' , variables)
table. add_column( 'params' , model. params)
table. add_column( 't_value' , model. tvalues)
print ( table)
for i in variables1:
Regression( i)
+----------+--------------------+-------------------+
| variable | params | t_value |
+----------+--------------------+-------------------+
| G | 0.9580545597955183 | 11.58046751097006 |
+----------+--------------------+-------------------+
+----------+--------------------+------------------+
| variable | params | t_value |
+----------+--------------------+------------------+
| M | 0.5464226879330865 | 2.26011000844851 |
+----------+--------------------+------------------+
+----------+---------------------+---------------------+
| variable | params | t_value |
+----------+---------------------+---------------------+
| W | -0.9469212860765703 | -10.203992009328719 |
+----------+---------------------+---------------------+
+----------+---------------------+--------------------+
| variable | params | t_value |
+----------+---------------------+--------------------+
| G | 1.149115809506588 | 12.493938392491058 |
| M | -0.2691869793154155 | -2.92677683816018 |
+----------+---------------------+--------------------+
+----------+---------------------+---------------------+
| variable | params | t_value |
+----------+---------------------+---------------------+
| G | 0.8681543482821978 | 1.6972058194211332 |
| W | -0.0912071656578326 | -0.1783062339475414 |
+----------+---------------------+---------------------+
+----------+----------------------+--------------------+
| variable | params | t_value |
+----------+----------------------+--------------------+
| G | 0.2354139032865854 | 0.7185343125921468 |
| M | -0.40468225819531545 | -4.713958990831133 |
| W | -1.0245539532036643 | -2.857832016932103 |
+----------+----------------------+--------------------+
+----------+---------------------+--------------------+
| variable | params | t_value |
+----------+---------------------+--------------------+
| M | -0.429953761442835 | -5.615087247526293 |
| W | -1.2759328902217022 | -16.66336044262241 |
+----------+---------------------+--------------------+
11.14 SELECTION OF VARIABLES IN AN AIR POLLUTION STUDY
λ and Table 11.13
data= pd. read_csv( 'C:/Users/可乐怪/Desktop/csv/P302.csv' )
λ= np. linalg. eig( data. iloc[ : , 1 : ] ) [ 0 ]
λ= sorted ( np. round ( λ, 3 ) , reverse = True )
print ( λ[ : 5 ] )
print ( λ[ 5 : 10 ] )
print ( λ[ 10 : 15 ] )
[4.877, 2.766, 2.293, 1.352, 1.223]
[1.087, 0.661, 0.481, 0.407, 0.245]
[0.194, 0.156, 0.117, 0.089, 0.046]
y= data. loc[ : 14 , [ 'y' ] ]
x= data. iloc[ : 15 , 1 : 16 ]
def output ( y, x) :
variable= x. columns[ 0 : ]
y= y. values
x= x. values
β= np. linalg. inv( x) @y
c= np. linalg. inv( x)
vif= np. diagonal( c)
σ_2= ( 1 - y. T. dot( c) . dot( y) ) / ( 60 - len ( y) - 1 )
se= ( ( σ_2** 0.5 ) * ( np. array( vif) ** 0.5 ) ) . reshape( ( len ( y) ) , 1 )
t= β/ se
r2= y. T. dot( c) . dot( y)
r2a= 1 - ( 59 / ( 60 - len ( y) - 1 ) ) * ( 1 - r2)
table= PrettyTable( )
table. add_column( "variable" , variable)
table. add_column( "Coefficient" , np. round ( ( β. reshape( len ( y) , ) ) , 3 ) )
table. add_column( "se" , np. round ( ( se. reshape( ( len ( y) ) , ) ) , 3 ) )
table. add_column( "t-value" , np. round ( ( t. reshape( ( len ( y) ) , ) ) , 3 ) )
table. add_column( "VIF" , np. round ( ( vif) , 3 ) )
table2= PrettyTable( )
table2. add_column( "n" , [ 60 ] )
table2. add_column( "R2" , np. round ( r2. reshape( - 1 ) , 3 ) )
table2. add_column( "R2a" , np. round ( r2a. reshape( - 1 ) , 3 ) )
table2. add_column( "σ" , np. round ( ( σ_2** 0.5 ) . reshape( - 1 ) , 3 ) )
table2. add_column( "df" , [ ( 60 - len ( y) - 1 ) ] )
print ( table)
print ( table2)
output( y, x)
+----------+-------------+-------+---------+--------+
| variable | Coefficient | se | t-value | VIF |
+----------+-------------+-------+---------+--------+
| x1 | 0.306 | 0.148 | 2.063 | 4.112 |
| x2 | -0.318 | 0.181 | -1.755 | 6.134 |
| x3 | -0.237 | 0.146 | -1.627 | 3.968 |
| x4 | -0.213 | 0.2 | -1.064 | 7.464 |
| x5 | -0.232 | 0.152 | -1.527 | 4.306 |
| x6 | -0.233 | 0.161 | -1.448 | 4.852 |
| x7 | -0.052 | 0.146 | -0.356 | 3.968 |
| x8 | 0.084 | 0.094 | 0.89 | 1.656 |
| x9 | 0.64 | 0.19 | 3.359 | 6.778 |
| x10 | -0.014 | 0.123 | -0.112 | 2.843 |
| x11 | -0.009 | 0.216 | -0.042 | 8.722 |
| x12 | -0.979 | 0.724 | -1.353 | 97.924 |
| x13 | 0.983 | 0.747 | 1.316 | 104.22 |
| x14 | 0.09 | 0.15 | 0.599 | 4.209 |
| x15 | 0.009 | 0.101 | 0.093 | 1.907 |
+----------+-------------+-------+---------+--------+
+----+-------+-------+-------+----+
| n | R2 | R2a | σ | df |
+----+-------+-------+-------+----+
| 60 | 0.764 | 0.684 | 0.073 | 44 |
+----+-------+-------+-------+----+
Figure 11.2 -4
from scipy. linalg import solve
def trace ( k, Y, X) :
Y= Y. values
X= X. values
kkk= [ ]
for i in range ( len ( Y) ) :
kkk. append( k)
X= np. diag( kkk) + X
θ = solve( X, Y)
return θ
kk= np. linspace( 0 , 0.5 , 100 )
for i in range ( 5 ) :
θ= [ ]
for ii in range ( 100 ) :
a= trace( kk[ ii] , y, x) [ i]
θ. append( a)
plt. plot( kk, θ)
plt. annotate( str ( i+ 1 ) , xy= ( kk[ 50 ] , θ[ 50 ] ) )
plt. suptitle( "Figure11.2" , size= 20 , c= "teal" )
plt. show( )
for i in range ( 5 ) :
θ= [ ]
for ii in range ( 100 ) :
a= trace( kk[ ii] , y, x) [ i+ 5 ]
θ. append( a)
plt. plot( kk, θ)
plt. annotate( str ( i+ 6 ) , xy= ( kk[ 30 ] , θ[ 30 ] ) )
plt. suptitle( "Figure11.3" , size= 20 , c= "teal" )
plt. show( )
for i in range ( 5 ) :
θ= [ ]
for ii in range ( 100 ) :
a= trace( kk[ ii] , y, x) [ i+ 10 ]
θ. append( a)
plt. plot( kk, θ)
plt. annotate( str ( i+ 11 ) , xy= ( kk[ 1 ] , θ[ 1 ] ) )
plt. suptitle( "Figure11.4" , size= 20 , c= "teal" )
plt. show( )
remaining variables: 1,2,3,4, 5,6,9, 12, 13, and 14
Table 11.14 Ten Predictor
x1= x. drop( labels= [ 'x7' , 'x8' , 'x10' , 'x11' , 'x15' ] , axis= 1 )
x1= x1. drop( [ 6 , 7 , 9 , 10 , 14 ] )
y1= y. drop( [ 6 , 7 , 9 , 10 , 14 ] )
output( y1, x1)
+----------+-------------+-------+---------+--------+
| variable | Coefficient | se | t-value | VIF |
+----------+-------------+-------+---------+--------+
| x1 | 0.306 | 0.135 | 2.26 | 3.747 |
| x2 | -0.345 | 0.119 | -2.907 | 2.879 |
| x3 | -0.244 | 0.108 | -2.256 | 2.391 |
| x4 | -0.222 | 0.175 | -1.274 | 6.224 |
| x5 | -0.268 | 0.137 | -1.959 | 3.811 |
| x6 | -0.292 | 0.103 | -2.842 | 2.149 |
| x9 | 0.664 | 0.14 | 4.748 | 3.99 |
| x12 | -1.001 | 0.658 | -1.522 | 88.295 |
| x13 | 1.001 | 0.673 | 1.488 | 92.398 |
| x14 | 0.098 | 0.127 | 0.775 | 3.293 |
+----------+-------------+-------+---------+--------+
+----+------+-------+------+----+
| n | R2 | R2a | σ | df |
+----+------+-------+------+----+
| 60 | 0.76 | 0.711 | 0.07 | 49 |
+----+------+-------+------+----+
Figure 11.5-6
kk= np. linspace( 0 , 0.5 , 100 )
for i in range ( 5 ) :
θ= [ ]
for ii in range ( 100 ) :
a= trace( kk[ ii] , y1, x1) [ i]
θ. append( a)
plt. plot( kk, θ)
plt. annotate( str ( i+ 1 ) , xy= ( kk[ 50 ] , θ[ 50 ] ) )
plt. suptitle( "Figure11.5" , size= 20 , c= "teal" )
plt. show( )
kk= np. linspace( 0 , 0.5 , 100 )
for i in range ( 5 ) :
θ= [ ]
for ii in range ( 100 ) :
a= trace( kk[ ii] , y1, x1) [ i+ 5 ]
θ. append( a)
plt. plot( kk, θ)
plt. annotate( str ( i+ 6 ) , xy= ( kk[ 50 ] , θ[ 50 ] ) )
plt. suptitle( "Figure11.6" , size= 20 , c= "teal" )
plt. show( )
remaining variables: 1,2,3,4, 5,6,9 and 14
Table 11.15 Eight Predictor
x2= x1. drop( labels= [ 'x12' , 'x13' ] , axis= 1 )
x2= x2. drop( [ 11 , 12 ] )
y2= y1. drop( [ 11 , 12 ] )
output( y2, x2)
+----------+-------------+-------+---------+-------+
| variable | Coefficient | se | t-value | VIF |
+----------+-------------+-------+---------+-------+
| x1 | 0.331 | 0.12 | 2.765 | 2.911 |
| x2 | -0.351 | 0.106 | -3.313 | 2.279 |
| x3 | -0.217 | 0.104 | -2.087 | 2.191 |
| x4 | -0.155 | 0.163 | -0.946 | 5.419 |
| x5 | -0.221 | 0.134 | -1.656 | 3.621 |
| x6 | -0.27 | 0.102 | -2.654 | 2.097 |
| x9 | 0.692 | 0.133 | 5.219 | 3.567 |
| x14 | 0.23 | 0.083 | 2.767 | 1.405 |
+----------+-------------+-------+---------+-------+
+----+-------+-------+------+----+
| n | R2 | R2a | σ | df |
+----+-------+-------+------+----+
| 60 | 0.749 | 0.709 | 0.07 | 51 |
+----+-------+-------+------+----+