前言
数据使用全国大学生数学建模2021年B题(乙醇偶合制备C4烯烃)附件二数据集 使用python中statsmodels包实现 官方文档 参数解读
导入包
import pandas as pd
import numpy as np
import statsmodels. api as sm
import matplotlib as mpl
import matplotlib. pyplot as plt
plt. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
plt. rcParams[ 'axes.unicode_minus' ] = False
% matplotlib inline
% config InlineBackend. figure_format= 'retina'
df = pd. read_csv( "C:/Users/lenovo/Desktop/附件2.csv" , encoding= "gbk" )
df. head( )
一元线性
X = sm. add_constant( df[ "时间" ] )
etoh_reg = sm. OLS( df[ "乙醇转化率" ] , X) . fit( )
print ( etoh_reg. summary( ) )
OLS Regression Results
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
Dep. Variable: 乙醇转化率 R- squared: 0.934
Model: OLS Adj. R- squared: 0.921
Method: Least Squares F- statistic: 70.59
Date: Sun, 03 Jul 2022 Prob ( F- statistic) : 0.000391
Time: 08 : 09 : 10 Log- Likelihood: - 11.145
No. Observations: 7 AIC: 26.29
Df Residuals: 5 BIC: 26.18
Df Model: 1
Covariance Type: nonrobust
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
coef std err t P> | t| [ 0.025 0.975 ]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
const 42.6549 1.098 38.855 0.000 39.833 45.477
时间 - 0.0526 0.006 - 8.402 0.000 - 0.069 - 0.037
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
Omnibus: nan Durbin- Watson: 1.548
Prob( Omnibus) : nan Jarque- Bera ( JB) : 0.783
Skew: 0.564 Prob( JB) : 0.676
Kurtosis: 1.812 Cond. No. 362.
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
X = sm. add_constant( df[ "时间" ] )
c4_reg = sm. OLS( df[ "C4烯烃选择性" ] , X) . fit( )
print ( c4_reg. summary( ) )
OLS Regression Results
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
Dep. Variable: C4烯烃选择性 R- squared: 0.046
Model: OLS Adj. R- squared: - 0.144
Method: Least Squares F- statistic: 0.2434
Date: Sun, 03 Jul 2022 Prob ( F- statistic) : 0.643
Time: 08 : 09 : 10 Log- Likelihood: - 10.336
No. Observations: 7 AIC: 24.67
Df Residuals: 5 BIC: 24.56
Df Model: 1
Covariance Type: nonrobust
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
coef std err t P> | t| [ 0.025 0.975 ]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
const 38.5808 0.978 39.451 0.000 36.067 41.095
时间 0.0028 0.006 0.493 0.643 - 0.012 0.017
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
Omnibus: nan Durbin- Watson: 2.156
Prob( Omnibus) : nan Jarque- Bera ( JB) : 0.773
Skew: - 0.814 Prob( JB) : 0.679
Kurtosis: 2.938 Cond. No. 362.
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
二次多项式
X = sm. add_constant( df[ "时间" ] )
X[ "时间二次项" ] = df[ "时间" ] ** 2
etoh_ploy = sm. OLS( df[ "乙醇转化率" ] , X) . fit( )
print ( etoh_ploy. summary( ) )
OLS Regression Results
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
Dep. Variable: 乙醇转化率 R- squared: 0.988
Model: OLS Adj. R- squared: 0.982
Method: Least Squares F- statistic: 163.2
Date: Sun, 03 Jul 2022 Prob ( F- statistic) : 0.000146
Time: 08 : 09 : 10 Log- Likelihood: - 5.2009
No. Observations: 7 AIC: 16.40
Df Residuals: 4 BIC: 16.24
Df Model: 2
Covariance Type: nonrobust
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
coef std err t P> | t| [ 0.025 0.975 ]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
const 45.2058 0.800 56.506 0.000 42.985 47.427
时间 - 0.1044 0.013 - 8.279 0.001 - 0.139 - 0.069
时间二次项 0.0002 4.15e-05 4.226 0.013 6.02e-05 0.000
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
Omnibus: nan Durbin- Watson: 3.379
Prob( Omnibus) : nan Jarque- Bera ( JB) : 0.323
Skew: - 0.475 Prob( JB) : 0.851
Kurtosis: 2.550 Cond. No. 1.26e+05
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
X = sm. add_constant( df[ "时间" ] )
X[ "时间二次项" ] = df[ "时间" ] ** 2
c4_ploy = sm. OLS( df[ "C4烯烃选择性" ] , X) . fit( )
print ( c4_ploy. summary( ) )
OLS Regression Results
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
Dep. Variable: C4烯烃选择性 R- squared: 0.241
Model: OLS Adj. R- squared: - 0.139
Method: Least Squares F- statistic: 0.6335
Date: Sun, 03 Jul 2022 Prob ( F- statistic) : 0.577
Time: 08 : 09 : 18 Log- Likelihood: - 9.5392
No. Observations: 7 AIC: 25.08
Df Residuals: 4 BIC: 24.92
Df Model: 2
Covariance Type: nonrobust
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
coef std err t P> | t| [ 0.025 0.975 ]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
const 39.7151 1.487 26.712 0.000 35.587 43.843
时间 - 0.0203 0.023 - 0.865 0.436 - 0.085 0.045
时间二次项 7.798e-05 7.71e-05 1.011 0.369 - 0.000 0.000
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
Omnibus: nan Durbin- Watson: 2.491
Prob( Omnibus) : nan Jarque- Bera ( JB) : 0.686
Skew: - 0.610 Prob( JB) : 0.710
Kurtosis: 2.071 Cond. No. 1.26e+05
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == ==
预测与绘图
temp = np. arange( 20 , 300 , 1 )
c4_x_pre = np. vstack( ( np. ones_like( temp) , temp, temp** 2 ) ) . T
plt. figure( dpi = 600 , figsize = ( 6 , 4 ) )
plt. plot( df[ '时间' ] , df[ '乙醇转化率' ] , 'o' )
plt. plot( temp, etoh_ploy. predict( c4_x_pre) )
plt. legend( [ '原数据' , '预测线' ] )
temp = np. arange( 20 , 300 , 1 )
c4_x_pre = np. vstack( ( np. ones_like( temp) , temp, temp** 2 ) ) . T
plt. figure( dpi = 600 , figsize = ( 6 , 4 ) )
plt. plot( df[ '时间' ] , df[ 'C4烯烃选择性' ] , 'o' )
plt. plot( temp, c4_ploy. predict( c4_x_pre) )
plt. legend( [ '原数据' , '预测线' ] )
plt. savefig( "2.png" )