%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
text = pd.read_csv(r'result.csv')
text.head()
| Unnamed: 0 | PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |
---|
0 | 0 | 1.0 | 0.0 | 3.0 | Braund, Mr. Owen Harris | male | 22.0 | 1.0 | 0.0 | A/5 21171 | 7.2500 | NaN | S |
---|
1 | 1 | 2.0 | 1.0 | 1.0 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1.0 | 0.0 | PC 17599 | 71.2833 | C85 | C |
---|
2 | 2 | 3.0 | 1.0 | 3.0 | Heikkinen, Miss. Laina | female | 26.0 | 0.0 | 0.0 | STON/O2. 3101282 | 7.9250 | NaN | S |
---|
3 | 3 | 4.0 | 1.0 | 1.0 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1.0 | 0.0 | 113803 | 53.1000 | C123 | S |
---|
4 | 4 | 5.0 | 0.0 | 3.0 | Allen, Mr. William Henry | male | 35.0 | 0.0 | 0.0 | 373450 | 8.0500 | NaN | S |
---|
如何让人一眼就能看懂你的数据
可视化展示泰坦尼克号数据集中男女中生存人数分布情况(用柱状图试试)。
sex = text['Survived'].groupby(text['Sex']).sum()
sex.plot.bar()
plt.title('survived_count')
plt.show
<function matplotlib.pyplot.show(*args, **kw)>

sex = text.groupby('Sex')['Survived'].sum()
sex.plot.bar()
plt.title('Survived_count')
plt.show()

可视化展示泰坦尼克号数据集中男女中生存人与死亡人数的比例图(用柱状图试试)。
text.groupby(['Sex','Survived'])['Survived'].count().unstack().plot(kind = 'bar',stacked = 'False')
plt.title('survived_count')
plt.ylabel('count')
Text(0, 0.5, 'count')

可视化展示泰坦尼克号数据集中不同票价的人生存和死亡人数分布情况。(用折线图试试)(横轴是不同票价,纵轴是存活人数)¶
fare_sta = text.groupby(['Fare'])['Survived'].value_counts().sort_values(ascending = False)
fare_sta
fig = plt.figure(figsize = (8,5))
fare_sta.plot(grid = True)
plt.legend()
plt.show()

fare_sta = text.groupby(['Fare'])['Survived'].value_counts()
fig = plt.figure(figsize = (8,6))
fare_sta.plot(grid = True)
plt.legend()
plt.show()

任务五:可视化展示泰坦尼克号数据集中不同仓位等级的人生存和死亡人员的分布情况。(用柱状图试试)
pclass_sta = text.groupby(['Pclass'])['Survived'].value_counts()
fig = plt.figure(figsize = (8,6))
pclass_sta.plot(grid = True)
plt.legend()
plt.show()

import seaborn as sns
sns.countplot(x = 'Survived', hue = 'Pclass', data = text)
<matplotlib.axes._subplots.AxesSubplot at 0x151be985988>

任务六:可视化展示泰坦尼克号数据集中不同年龄的人生存与死亡人数分布情况。(不限表达方式)
facet = sns.FacetGrid(text, hue="Survived",aspect=3)
facet.map(sns.kdeplot,'Age',shade= True)
facet.set(xlim=(0, text['Age'].max()))
facet.add_legend()
<seaborn.axisgrid.FacetGrid at 0x151bea0f608>

任务七:可视化展示泰坦尼克号数据集中不同仓位等级的人年龄分布情况。(用折线图试试)
text.Age[text.Pclass == 1].plot(kind='kde')
text.Age[text.Pclass == 2].plot(kind='kde')
text.Age[text.Pclass == 3].plot(kind='kde')
plt.xlabel("age")
plt.legend((1,2,3),loc="best")
<matplotlib.legend.Legend at 0x151beade4c8>
