3、对于泰坦尼克号的数据集,试分析幸存与否与独立登船是否相关(alone数据列),进一步地,可以分析与年龄段(age数据列)是否相关。
# 引入库
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
# 图表内嵌中文字体问题
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
sns.set_style('whitegrid',{'font.sans-serif':['simhei', 'Arial']})
# 导入数据
titanic = pd.read_csv('titanic.csv')
# 统计幸存者的是否独立登船比例
survived = titanic.groupby(['alone','survived']).size().unstack()
survived['sum'] = survived[0]+survived[1]
survived['生还率'] = survived[1]/survived['sum']
print('幸存者按照是否独立登船统计的生还者、遇难者:')
print('0:遇难,1:生还')
print(survived)
print('幸存与否与独立登船的相关系数:')
print(titanic['survived'].corr(titanic['alone']))
mm = titanic['survived'