#现在预处理第一步已经做好了,接下来我们来画一下图看看各个特征与存活之间的关系#首先看下生还人数的一个分布,由于matplotlib对中文支持不太友好,用英文做标题
train_df.Survived.value_counts().plot(kind="bar")
plt.title("distribution of survival,(1 = survived, 0 = did not survived)")
Text(0.5,1,'distribution of survival,(1 = survived, 0 = did not survived)')
#之后我们来看下不同阶级游客的数量
train_df.Pclass.value_counts().plot(kind="bar")
plt.title("Number of passengers per class")
Text(0.5,1,'Number of passengers per class')
#再来看下不同登船口上船的人员数量
train_df.Embarked.value_counts().plot(kind='bar')
plt.title("Passengers per embarked location")
Text(0.5,1,'Passengers per embarked location')
#有n个兄弟姐妹或配偶在船上的数量
train_df.SibSp.value_counts().plot(kind='bar')
plt.title("Passengers with siblings or spouse")
Text(0.5,1,'Passengers with siblings or spouse')
#还有存活下来与年龄之间的关系
plt.scatter(train_df.Age,train_df.Survived,alpha = 0.1)
plt.title("Age distribution v/s survived")
Text(0.5,1,'Age distribution v/s survived')
#现在我们串起来看看,性别与存活情况之间的关系#首先来看下男士的生还情况
train_df.Survived[train_df.Sex == 'male'].value_counts().plot(kind = 'bar')
plt.title("Analyzing male passengers: survived and not survived")
Text(0.5,1,'Analyzing male passengers: survived and not survived')
#这个是女性的生还情况
train_df.Survived[train_df.Sex == 'female'].value_counts().sort_index().plot(kind = 'bar', color='pink')
plt.title("Analyzing female passengers: survived and not survived")
Text(0.5,1,'Analyzing female passengers: survived and not survived')
#很明显,这个存活情况与性别是很有关系的#之后我们再看看不同阶级之间存活情况的区别#因为3rd几乎是1&2的两倍,所以富人们按1+2来算
train_df.Survived[train_df.Pclass != 3].value_counts().sort_index().plot(kind = 'bar', color = 'green')
plt.title("Analyzing high class passengers: not survived and survived")
Text(0.5,1,'Analyzing high class passengers: not survived and survived')
#下面我们来看看那些低阶级的穷人们
train_df.Survived[train_df.Pclass == 3].value_counts().sort_index().plot(kind = 'bar', color = 'green')
plt.title("Analyzing low class passengers: not survived and survived")
Text(0.5,1,'Analyzing low class passengers: not survived and survived')