python假设检验

尝试解决以下问题:
• 人类体温均值真的是98.6F吗?
• 体温样本数据是否服从正态分布?
• 不正常的体温是多少?
• 男性和女性的正常体温有明显的区别吗?
• 体温和心率是否有相关性?
数据:https://pan.baidu.com/s/1t4SKF6U2yyjT365FaE692A

#假设检验
import pandas as pd
import numpy as np
from scipy import stats
path = 'E:\\test.csv'
data = pd.read_csv(path)
print(data.describe())
tem = data["Temperature"]
def interval(data):
    mean = np.mean(data)
    std = np.std(data)
    interval = stats.norm.interval(0.95,mean,std)
    return interval
print (interval(tem))
'''
       Temperature      Gender   HeartRate
count   130.000000  130.000000  130.000000
mean     98.249231    1.500000   73.761538
std       0.733183    0.501934    7.062077
min      96.300000    1.000000   57.000000
25%      97.800000    1.000000   69.000000
50%      98.300000    1.500000   74.000000
75%      98.700000    2.000000   79.000000
max     100.800000    2.000000   89.000000
'''
#检验是否符合正态分布
ks_test = stats.kstest(data['Temperature'],'norm')
print(ks_test)

shapiro_test = stats.shapiro(data['Temperature'])
print(shapiro_test)

normal_test = stats.normaltest(data['Temperature'],axis=None)
print(normal_test)
'''
KstestResult(statistic=0.06472685044046644, pvalue=0.6450307317439967)
(0.9865769743919373, 0.2331680953502655)
NormaltestResult(statistic=2.703801433319236, pvalue=0.2587479863488212)
'''
#从以上三种方法的检验结果来看符合正态分布

# 计算上下四分位数
Q1 = data['Temperature'].quantile(q=0.25)
Q3 = data['Temperature'].quantile(q=0.75)

print(Q1)
print(Q3)

# 箱型图方法判断的异常值, 1.5倍的四分位差 计算上下须对应的值
low_quantile = Q1 - 1.5 * (Q3 - Q1)
high_quantile = Q3 + 1.5 * (Q3 - Q1)

print(low_quantile)
print(high_quantile)

# 输出异常值
value = data['Temperature'][(data['Temperature'] > high_quantile) | (data['Temperature'] < low_quantile)]
print("异常值:" ,value)
'''
97.8
98.7
96.44999999999999
100.05000000000001
0       96.3
65      96.4
129    100.8
'''
#首先剔除全部的异常值
data2 = data.loc[(data['Temperature'] != 96.3)&(data['Temperature'] != 96.4)&(data['Temperature'] != 100.8)]
data3 = data2.loc[data2['Gender']==1]
man_mean_temperature = data3['Temperature'].mean()
print(man_mean_temperature)

data4 = data2.loc[data2['Gender']==2]
woman_mean_temperature = data4['Temperature'].mean()
print(woman_mean_temperature)
'''
98.13281250000003
98.38730158730158
'''
miaoshu = data.groupby('Gender')["Temperature"].describe()
print(miaoshu)

male_tem = data.query(" Gender > 1")["Temperature"]
fmale_tem = data.query(" Gender < 2")["Temperature"]

#检验是否是齐方差
print(stats.levene(male_tem,fmale_tem))
#方差齐的情况下,可以使用ttst_ind(a,b)来做假设检验的:
print(stats.ttest_ind(male_tem,fmale_tem))

'''
        count       mean       std   min   25%   50%   75%    max
Gender                                                           
1        65.0  98.104615  0.698756  96.3  97.6  98.1  98.6   99.5
2        65.0  98.393846  0.743488  96.4  98.0  98.4  98.8  100.8
LeveneResult(statistic=0.06354951292025163, pvalue=0.8013756068102883)
Ttest_indResult(statistic=2.2854345381654984, pvalue=0.02393188312240236)
'''
#从结果来看,可以接受齐方差,拒绝均值相等。即男女体温均值有差异。
print(data2.corr())
print(data2.corr('kendall'))
print(data2.corr('spearman'))
'''
             Temperature    Gender  HeartRate
Temperature     1.000000  0.192293   0.243285
Gender          0.192293  1.000000   0.054193
HeartRate       0.243285  0.054193   1.000000
             Temperature    Gender  HeartRate
Temperature     1.000000  0.159488   0.176732
Gender          0.159488  1.000000   0.064551
HeartRate       0.176732  0.064551   1.000000
             Temperature    Gender  HeartRate
Temperature     1.000000  0.190609   0.265460
Gender          0.190609  1.000000   0.077409
HeartRate       0.265460  0.077409   1.000000
'''
#从上述三种系数来看,心率与体温呈现弱相关性

参考资料
• 《统计学(第七版)》贾俊平
• https://mp.weixin.qq.com/s/HhXtojmo9QBBwpE0yC0Q7A

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值