# -*- coding: utf-8 -*-
"""
Created on Tue Jul 9 15:27:58 2019
@author: User
"""
# 《Python数据分析基础》中国统计出版社
import numpy as np
from scipy import stats
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.graphics.api import interaction_plot
from matplotlib.font_manager import FontProperties
myfont=FontProperties(fname='data\msyh.ttc')
sale_points = pd.read_csv(u'data\\ch7\\sale_points.csv',encoding = "gbk")
sale_points['market'] = sale_points['market'].astype('category')
sale_points['market'].cat.categories=['market 1', 'market 2',
'market 3']
sale_points['warranty'] = sale_points['warranty'].astype('category')
sale_points['warranty'].cat.categories=['1 years', '3 years']
print(sale_points.head())
formula = 'sales ~ points + C(market) * C(warranty)'
sale_points_anova_cov = sm.stats.anova_lm(smf.ols(formula,
data = sale_points).fit()) # dc_sales_est 是一个模型对象
print(sale_points_anova_cov)
运行:
market warranty sales points
0 market 1 1 years 26.0 1.8
1 market 1 1 years 22.0 1.1
2 market 1 1 years 21.8 0.9
3 market 1 1 years 33.1 2.2
4 market 2 1 years 22.0 2.0
df sum_sq mean_sq F PR(>F)
C(market) 2.0 593.160833 296.580417 56.984051 2.903413e-08
C(warranty) 1.0 512.450417 512.450417 98.460650 1.734504e-08
C(market):C(warranty) 2.0 167.155833 83.577917 16.058404 1.211601e-04
points 1.0 196.523934 196.523934 37.759505 1.079351e-05
Residual 17.0 88.478566 5.204622 NaN NaN