Coursera-week 4

Explanatory : x = frequency of drinking beer

response: y= alcohol dependence

 

import pandas

import numpy

import seaborn
import matplotlib.pyplot as plt

data = pandas.read_csv('nesarc_pds.csv', low_memory=False)

pandas.set_option('display.max_columns', None)
pandas.set_option('display.max_row', None)
pandas.set_option('display.float_format', lambda x:'%f'%x)


data['S2BQ1A2']=data['S2BQ1A2'].convert_objects(convert_numeric=True)
data['S2AQ5B']=data['S2AQ5B'].convert_objects(convert_numeric=True)
data['S2AQ5D']=data['S2AQ5D'].convert_objects(convert_numeric=True)

#1
data['S2BQ1A2']=data['S2BQ1A2'].replace(9, 'NaN')

data['S2BQ1A2']=data['S2BQ1A2'].astype('category')
seaborn.countplot(x='S2BQ1A2',data=data)
plt.xlabel('people ever wanted to drink more')
plt.title('Estimated alcohol dependence')

print('counts for S2BQ1A2_with 9 set to NAN')
cw1=data.groupby('S2BQ1A2').size()
print(cw1)

#2
data['S2AQ5B']=data['S2AQ5B'].replace(99, 'NaN')

print('counts for S2AQ5B_with 99 set to NAN')
cw2=data.groupby('S2AQ5B').size()
print(cw2)

#3
data['S2AQ5D']=data['S2AQ5D'].replace(99, 'NaN')

print('counts for S2AQ5D_with 99 set to NAN')
cw3=data.groupby('S2AQ5D').size()
print(cw3)

seaborn.factorplot(x='S2AQ5B', y='S2BQ1A2', data=data, kind='bar', ci=None)
plt.xlabel('frequency of drinking beer')
plt.ylabel('alcohol dependence')


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值