# -*- coding: utf-8 -*-
"""
Created on Mon Jul 8 16:46:09 2019
@author: User
"""
# 《Python数据分析基础》中国统计出版社
#import numpy as np
from scipy import stats
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from matplotlib.font_manager import FontProperties
myfont=FontProperties(fname='data\msyh.ttc')
dc_sales = pd.read_csv(u'data\\ch7\\dc_sales.csv',encoding = "gbk")
dc_sales['pixel'] = dc_sales['pixel'].astype('category')
dc_sales['pixel'].cat.categories=['500万像素及以下','500-600万像素',
'600-800万像素','800-1000万像素',
'1000万像素及以上']
print(dc_sales.head())
formula = 'sales ~ C(pixel)-1'
dc_sales_est = smf.ols(formula, dc_sales).fit() # dc_sales_est 是一个模型对象
print("\n dc_sales_est.fittedvalues:")
print(dc_sales_est.fittedvalues)
运行:
dc_sales_est.fittedvalues:
0 81.125
1 95.750
2 107.125
3 124.000
4 122.125
5 81.125
6 95.750
7 107.125
8 124.000
9 122.125
10 81.125
11 95.750
12 107.125
13 124.000
14 122.125
15 81.125
16 95.750
17 107.125
18 124.000
19 122.125
20 81.125
21 95.750
22 107.125
23 124.000
24 122.125
25 81.125
26 95.750
27 107.125
28 124.000
29 122.125
30 81.125
31 95.750
32 107.125
33 124.000
34 122.125
35 81.125
36 95.750
37 107.125
38 124.000
39 122.125
dtype: float64