泰坦尼克号简单数据分析

import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif'] = ['SimHei'] #解决中文显示
plt.rcParams['axes.unicode_minus'] = False   #解决符号无法显示
data = pd.read_csv('../data/titanic.csv')
data.head(10)

PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S4503Allen, Mr. William Henrymale35.0003734508.0500NaNS5603Moran, Mr. JamesmaleNaN003308778.4583NaNQ6701McCarthy, Mr. Timothy Jmale54.0001746351.8625E46S7803Palsson, Master. Gosta Leonardmale2.03134990921.0750NaNS8913Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.00234774211.1333NaNS91012Nasser, Mrs. Nicholas (Adele Achem)female14.01023773630.0708NaNC

# PassengerId: 乘客ID
# Survived: 是否幸存 (0表示未幸存,1表示幸存)
# Pclass: 客舱等级 (1表示一等舱,2表示二等舱,3表示三等舱)
# Name: 乘客姓名
# Sex: 性别
# Age: 年龄(有缺失)
# SibSp: 与乘客一起登船的兄弟姐妹/配偶的数量
# Parch: 与乘客一起登船的父母/子女的数量
# Ticket: 船票号码
# Fare: 票价
# Cabin: 客舱号码(有缺失)
# Embarked: 登船港口 (C表示Cherbourg,Q表示Queenstown,S表示Southampton)(有缺失)
# 统计各客舱等级的幸存人数和总人数
survived_by_pclass = data.groupby('Pclass')['Survived'].sum()
# survived_by_pclass = data.groupby('Pclass')['Survived'].value_counts().unstack().fillna(0)
total_by_pclass = data.groupby('Pclass')['Survived'].count()
print(survived_by_pclass)
print(total_by_pclass)

Pclass
1    136
2     87
3    119
Name: Survived, dtype: int64
Pclass
1    216
2    184
3    491
Name: Survived, dtype: int64

# 计算各客舱等级的生存率
survival_rate_by_pclass = survived_by_pclass / total_by_pclass

# 绘制柱状图
import matplotlib.pyplot as plt

plt.bar(survived_by_pclass.index, survived_by_pclass, label='Survived')
plt.bar(total_by_pclass.index, total_by_pclass - survived_by_pclass, bottom=survived_by_pclass, label='Not Survived')
plt.xlabel('Pclass')
plt.ylabel('Count')
plt.title('Survival Count by Pclass')
plt.legend()
plt.show()

# 绘制条形图
plt.bar(survival_rate_by_pclass.index, survival_rate_by_pclass)
plt.xlabel('Pclass')
plt.ylabel('Survival Rate')
plt.title('Survival Rate by Pclass')
plt.show()

 

# 统计各性别的幸存人数和总人数
survived_by_gender = data.groupby('Sex')['Survived'].sum()
total_by_gender = data.groupby('Sex')['Survived'].count()

# 计算各性别的生存率
survival_rate_by_gender = survived_by_gender / total_by_gender 

# 绘制条形图
import matplotlib.pyplot as plt

plt.bar(survived_by_gender.index, survived_by_gender, label='Survived')
plt.bar(total_by_gender.index, total_by_gender - survived_by_gender, bottom=survived_by_gender, label='Not Survived')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.title('Survival Count by Gender')
plt.legend()
plt.show()

# 绘制饼图
plt.pie(survived_by_gender, labels=survived_by_gender.index, autopct='%1.1f%%')
plt.title('Survival Rate by Gender')
plt.show()

 

# 进行年龄分组
data['AgeGroup'] = pd.cut(data['Age'], bins=[0, 5,10,15, 20,25, 30,35, 40,45, 50,55, 60,65, 70,75, 80])
# 统计每个年龄组的幸存人数和总人数
survived_by_agegroup = data.groupby('AgeGroup')['Survived'].sum()
total_by_agegroup = data.groupby('AgeGroup')['Survived'].count()
# 计算每个年龄组的生存率
survival_rate_by_agegroup = survived_by_agegroup / total_by_agegroup
plt.figure(figsize=(10, 6))  # 调整图像大小为宽度10,高度6
# 绘制条形图
import matplotlib.pyplot as plt
plt.tight_layout()  # 自动调整布局
plt.xticks(rotation=45)  # 逆时针旋转45度

plt.bar(survived_by_agegroup.index.astype(str), survived_by_agegroup, label='Survived')
plt.bar(total_by_agegroup.index.astype(str), total_by_agegroup - survived_by_agegroup, bottom=survived_by_agegroup, label='Not Survived')
plt.xlabel('Age Group')
plt.ylabel('Count')
plt.title('Survival Count by Age Group')
plt.legend()
plt.show()
plt.figure(figsize=(10, 6))  # 调整图像大小为宽度10,高度6
# 绘制折线图
plt.tight_layout()  # 自动调整布局
plt.xticks(rotation=45)  # 逆时针旋转45度

plt.plot(survival_rate_by_agegroup.index.astype(str), survival_rate_by_agegroup, marker='o')
plt.xlabel('Age Group')
plt.ylabel('Survival Rate')
plt.title('Survival Rate by Age Group')
plt.show()

 

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值