import os
os.chdir(r"E:\BaiduNetdiskDownload\6inference")
import pandas as pd
house_price_gr = pd.read_csv(r'house_price_gr.csv', encoding='gbk')
house_price_gr.head()
#先查看一下数据
dis_name | rate | |
---|---|---|
0 | 东城区甘南小区 | 0.169747 |
1 | 东城区察慈小区 | 0.165484 |
2 | 东城区胡家园小区 | 0.141358 |
3 | 东城区台基厂小区 | 0.063197 |
4 | 东城区青年湖小区 | 0.101528 |
house_price_gr.describe()
rate | |
---|---|
count | 150.000000 |
mean | 0.110061 |
std | 0.041333 |
min | 0.029540 |
25% | 0.080027 |
50% | 0.104908 |
75% | 0.140066 |
max | 0.243743 |
house_price_gr.describe(include='all')
dis_name | rate | |
---|---|---|
count | 150 | 150.000000 |
unique | 150 | NaN |
top | 朝阳区小关北里24号院 | NaN |
freq | 1 | NaN |
mean | NaN | 0.110061 |
std | NaN | 0.041333 |
min | NaN | 0.029540 |
25% | NaN | 0.080027 |
50% | NaN | 0.104908 |
75% | NaN | 0.140066 |
max | NaN | 0.243743 |
get_ipython().magic('matplotlib inline')
import seaborn as sns
from scipy import stats
sns.distplot(house_price_gr.rate, kde=True, fit=stats.norm) # Histograph 返回直方图和曲线图,fit是加上正态分布
import statsmodels.api as sm
from matplotlib import pyplot as plt
fig = sm.qqplot(house_price_gr.rate, fit=True, line='45')
fig.show() #查看图像
house_price_gr.plot(kind='box') # Box Plots
>
# 置信度区间估计
#方差s^2=[(x1-x)^2+(x2-x)^2+......(xn-x)^2]/(n)(x为平均数)
se = house_price_gr.rate.std() / len(house_price_gr) ** 0.5 # 标准误=标准差/根号样本量
print(se)
LB = house_price_gr.rate.mean() - 1.96 * se #下界 均值减去1.96倍标准误 表示95%的置信区间
UB = house_price_gr.rate.mean() + 1.96 * se
(LB, UB) #95%的置信区间,95%的把握认为增长度在LB,UB之间
0.003374832409178327
(0.10344632517993363, 0.11667566822391268)
#所以可以得出结论,想要以10%增长率买到房的置信度小于2.5%,认为是不可能事件
# 如果要求任意置信度下的置信区间的话,可以自己编一个函数
def confint(x, alpha=0.05):
n = len(x)
xb = x.mean()
df = n-1
tmp = (x.std() / n ** 0.5) * stats.t.ppf(1-alpha/2, df)
return {'Mean': xb, 'Degree of Freedom':df, 'LB':xb-tmp, 'UB':xb+tmp}
confint(house_price_gr.rate, 0.05)
{'Mean': 0.11006099670192315,
'Degree of Freedom': 149,
'LB': 0.10339228338892809,
'UB': 0.11672971001491822}
x=house_price_gr.rate
n = len(x)
df = n-1
xb = x.mean()
xb-(x.std() / n ** 0.5) * stats.t.ppf(0.025, df)
0.11672971001491822
# 或者使用DescrStatsW 包来计算zhix置信度
d1= sm.stats.DescrStatsW(house_price_gr.rate)
d1.tconfint_mean(0.0 5) #
(0.10339228338892814, 0.11672971001491828)