pwd
‘d:\\python\\exerise-df\\df-data-analysis’
from scipy import stats
import pandas as pd
import numpy as np
from statsmodels.formula.api import ols
import statsmodels.api as sm
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt
单变量分析分析
dat = pd.read_csv("simple-resgreesion.csv")
dat.head()
N | weight | |
---|---|---|
0 | 58 | 115 |
1 | 59 | 117 |
2 | 60 | 120 |
3 | 61 | 123 |
4 | 62 | 126 |
model = ols('weight ~ N',dat).fit()
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: weight R-squared: 0.991
Model: OLS Adj. R-squared: 0.990
Method: Least Squares F-statistic: 1433.
Date: Wed, 27 Sep 2017 Prob (F-statistic): 1.09e-14
Time: 14:49:40 Log-Likelihood: -26.541
No. Observations: 15 AIC: 57.08
Df Residuals: 13 BIC: 58.50
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept -87.5167 5.937 -14.741 0.000 -100.343 -74.691
N 3.4500 0.091 37.855 0.000 3.253 3.647
==============================================================================
Omnibus: 2.396 Durbin-Watson: 0.315
Prob(Omnibus): 0.302 Jarque-Bera (JB): 1.660
Skew: 0.789 Prob(JB): 0.436
Kurtosis: 2.596 Cond. No. 982.
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
多项式回归分析
dat2 = pd.read_csv("duoxiangshi.csv")
dat2.head()
N | weight | |
---|---|---|
0 | 58 | 115 |
1 | 59 | 117 |
2 | 60 | 120 |
3 | 61 | 123 |
4 | 62 | 126 |
mod = ols('weight ~ N + I(N**2)',dat2).fit()
print(mod.summary())
OLS Regression Results
==============================================================================
Dep. Variable: weight R-squared: 0.999
Model: OLS Adj. R-squared: 0.999
Method: Least Squares F-statistic: 1.139e+04
Date: Wed, 27 Sep 2017 Prob (F-statistic): 2.13e-20
Time: 14:59:57 Log-Likelihood: -5.2563
No. Observations: 15 AIC: 16.51
Df Residuals: 12 BIC: 18.64
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept 261.8782 25.197 10.393 0.000 206.979 316.777
N -7.3483 0.778 -9.449 0.000 -9.043 -5.654
I(N ** 2) 0.0831 0.006 13.891 0.000 0.070 0.096
==============================================================================
Omnibus: 2.449 Durbin-Watson: 1.144
Prob(Omnibus): 0.294 Jarque-Bera (JB): 1.033
Skew: 0.049 Prob(JB): 0.597
Kurtosis: 1.718 Cond. No. 1.09e+06
==============================================================================
多变量回归分析
dat = pd.read_csv("mul-regression.csv")
dat.head()
x1 | x2 | x3 | x4 | y | |
---|---|---|---|---|---|
0 | 30.8 | 33.0 | 50.0 | 90 | 520.8 |
1 | 23.6 | 33.6 | 28.0 | 64 | 195.0 |
2 | 31.5 | 34.0 | 36.6 | 82 | 424.0 |
3 | 19.8 | 32.0 | 36.0 | 70 | 213.5 |
4 | 27.7 | 26.0 | 47.2 | 74 | 403.3 |
mod = ols('y ~ x1 + x2 + x3 + x4',dat).fit()
print(mod.summary())
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.894
Model: OLS Adj. R-squared: 0.866
Method: Least Squares F-statistic: 31.78
Date: Wed, 27 Sep 2017 Prob (F-statistic): 3.66e-07
Time: 14:52:33 Log-Likelihood: -97.454
No. Observations: 20 AIC: 204.9
Df Residuals: 15 BIC: 209.9
Df Model: 4
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept -625.3583 114.378 -5.467 0.000 -869.150 -381.566
x1 15.1962 2.127 7.146 0.000 10.663 19.729
x2 7.3785 1.889 3.907 0.001 3.353 11.404
x3 9.5034 1.342 7.082 0.000 6.643 12.364
x4 -0.8468 1.493 -0.567 0.579 -4.029 2.335
==============================================================================
Omnibus: 0.492 Durbin-Watson: 1.620
Prob(Omnibus): 0.782 Jarque-Bera (JB): 0.578
Skew: -0.294 Prob(JB): 0.749
Kurtosis: 2.409 Cond. No. 1.38e+03
==============================================================================