# -*- coding: utf-8 -*-
"""
Created on Tue Jul 9 11:28:45 2019
@author: User
"""
# 《Python数据分析基础》中国统计出版社
#import numpy as np
from scipy import stats
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from matplotlib.font_manager import FontProperties
myfont=FontProperties(fname='data\msyh.ttc')
house = pd.read_csv(u'data\\ch7\\house.csv',encoding = "gbk")
house['education'] = house['education'].astype('category')
house['education'].cat.categories=['初中及以下', '高中(中专)',
'大学', '研究生及以上']
house['unit'] = house['unit'].astype('category')
house['unit'].cat.categories=['国营企业', '行政事业单位','大专院校科研院所',
'私营企业', '失业','其他']
house['income'] = house['income'].astype('category')
house['income'].cat.categories=['10000元以下', '10000-25000元','25000-50000元',
'50000-75000元', '75000以上']
house['type'] = house['type'].astype('category')
house['type'].cat.categories=['一室一厅', '二室一厅','二室二厅', '三室一厅',
'三室二厅', '三室三厅','四室二厅一卫', '四室二厅二卫',
'四室三厅一卫', '四室三厅二卫','更大户型']
print(house.head())
formula = 'space ~ C(income) + C(type)'
house_anova_est = smf.ols(formula, data = house).fit() # dc_sales_est 是一个模型对象
print(house_anova_est.summary2())
运行:
education unit income type space
0 初中及以下 大专院校科研院所 10000-25000元 二室一厅 75.0
1 初中及以下 失业 10000元以下 三室三厅 55.0
2 大学 国营企业 10000-25000元 四室二厅二卫 56.0
3 高中(中专) 其他 10000-25000元 三室一厅 51.0
4 高中(中专) 私营企业 10000元以下 三室二厅 60.0
Results: Ordinary least squares
============================================================================
Model: OLS Adj. R-squared: 0.146
Dependent Variable: space AIC: 4076.2755
Date: 2019-07-09 11:33 BIC: 4138.6302
No. Observations: 472 Log-Likelihood: -2023.1
Df Model: 14 F-statistic: 6.752
Df Residuals: 457 Prob (F-statistic): 1.38e-12
R-squared: 0.171 Scale: 319.59
----------------------------------------------------------------------------
Coef. Std.Err. t P>|t| [0.025 0.975]
----------------------------------------------------------------------------
Intercept 86.1464 9.0005 9.5713 0.0000 68.4589 103.8339
C(income)[T.10000-25000元] 4.2072 2.1086 1.9953 0.0466 0.0635 8.3509
C(income)[T.25000-50000元] 9.9601 2.5641 3.8844 0.0001 4.9212 14.9990
C(income)[T.50000-75000元] 16.1291 5.1202 3.1501 0.0017 6.0670 26.1911
C(income)[T.75000以上] 29.3518 5.9372 4.9437 0.0000 17.6842 41.0193
C(type)[T.二室一厅] -23.0808 9.1696 -2.5171 0.0122 -41.1005 -5.0610
C(type)[T.二室二厅] -22.5007 9.2538 -2.4315 0.0154 -40.6859 -4.3155
C(type)[T.三室一厅] -23.0634 9.1100 -2.5317 0.0117 -40.9660 -5.1608
C(type)[T.三室二厅] -19.4782 9.0933 -2.1420 0.0327 -37.3481 -1.6083
C(type)[T.三室三厅] -28.6264 15.5200 -1.8445 0.0658 -59.1259 1.8730
C(type)[T.四室二厅一卫] -7.2395 10.1864 -0.7107 0.4776 -27.2575 12.7784
C(type)[T.四室二厅二卫] -12.6134 9.5902 -1.3152 0.1891 -31.4598 6.2329
C(type)[T.四室三厅一卫] -38.3536 20.0149 -1.9162 0.0560 -77.6864 0.9791
C(type)[T.四室三厅二卫] -26.4948 12.0484 -2.1990 0.0284 -50.1719 -2.8176
C(type)[T.更大户型] -4.5995 10.5095 -0.4377 0.6618 -25.2524 16.0533
----------------------------------------------------------------------------
Omnibus: 88.385 Durbin-Watson: 1.372
Prob(Omnibus): 0.000 Jarque-Bera (JB): 157.668
Skew: 1.083 Prob(JB): 0.000
Kurtosis: 4.825 Condition No.: 47
============================================================================