Explanatory : x = frequency of drinking beer
response: y= alcohol dependence
import pandas
import numpy
import seaborn
import matplotlib.pyplot as plt
data = pandas.read_csv('nesarc_pds.csv', low_memory=False)
pandas.set_option('display.max_columns', None)
pandas.set_option('display.max_row', None)
pandas.set_option('display.float_format', lambda x:'%f'%x)
data['S2BQ1A2']=data['S2BQ1A2'].convert_objects(convert_numeric=True)
data['S2AQ5B']=data['S2AQ5B'].convert_objects(convert_numeric=True)
data['S2AQ5D']=data['S2AQ5D'].convert_objects(convert_numeric=True)
#1
data['S2BQ1A2']=data['S2BQ1A2'].replace(9, 'NaN')
data['S2BQ1A2']=data['S2BQ1A2'].astype('category')
seaborn.countplot(x='S2BQ1A2',data=data)
plt.xlabel('people ever wanted to drink more')
plt.title('Estimated alcohol dependence')
print('counts for S2BQ1A2_with 9 set to NAN')
cw1=data.groupby('S2BQ1A2').size()
print(cw1)
#2
data['S2AQ5B']=data['S2AQ5B'].replace(99, 'NaN')
print('counts for S2AQ5B_with 99 set to NAN')
cw2=data.groupby('S2AQ5B').size()
print(cw2)
#3
data['S2AQ5D']=data['S2AQ5D'].replace(99, 'NaN')
print('counts for S2AQ5D_with 99 set to NAN')
cw3=data.groupby('S2AQ5D').size()
print(cw3)
seaborn.factorplot(x='S2AQ5B', y='S2BQ1A2', data=data, kind='bar', ci=None)
plt.xlabel('frequency of drinking beer')
plt.ylabel('alcohol dependence')