数据分析实战⑤——泰坦尼克号生存分析

'''泰坦尼克号乘客生还数据分析实战'''
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

"""性别,年龄,乘客等级,性别和仓位,年级和性别,年龄和等级"""

#中文设置
plt.rcParams['font.sans-serif'] = ['SimHei']

#读取数据
data = pd.read_csv('train.csv')
print(data.head(5))
print(data.info())
print(data.describe())

#数据清洗 
#缺失值处理 1.删除缺失值 dropna() 2.填充缺失值fillna(),使用平均值或者中位数填充
age_mean = data['Age'].mean()
data['Age'] = data['Age'].fillna(age_mean)
print(data.info())

#将性别转换为数值型
def sex_value(Sex):
    if Sex == "male":
        return 1
    if Sex == "female":
        return 0

data['Sex'] = data['Sex'].apply(sex_value)
print(data.head(5))

data['Sex'] = data['Sex'].map({"male":1,"female":0})

#获取生还乘客数据
survived = data.loc[data['Survived']==1]
print(survived.head(5))

#获取未生还乘客数据
no_survived = data.loc[data['Survived']==0]
print(no_survived.head(5))

#获取生还乘客的性别数据
survived["Sex"]

#获取未生还乘客的性别数据
no_survived["Sex"]

#绘制生还乘客的性别直方图
plt.figure(figsize=(10, 6))
sns.countplot(data=data, x='Sex', hue='Survived', palette="pastel") #countplot()函数绘制直方图 hue参数指定分组字段
plt.title('Gender Distribution of Survived and Not Survived Passengers')
plt.xlabel('Sex')
plt.xticks(ticks=[0, 1], labels=['Female', 'Male'])
plt.ylabel('Count')
plt.legend(title='Survived', labels=['Not Survived', 'Survived'])
plt.show()


#绘制等级与生还的关系图
grouped_data_pclass = data.groupby(['Pclass', 'Survived']).size().unstack()
ax = grouped_data_pclass.plot(kind='bar', stacked=True, figsize=(10, 6), color=["#FF9999", "#66B2FF"])
plt.title('Distribution of Survived and Not Survived Passengers by Passenger Class (Stacked Bar Chart)')
plt.xlabel('Passenger Class (Pclass)')
ax.set_xticklabels(['1st Class', '2nd Class', '3rd Class'], rotation=0)
plt.ylabel('Count')
plt.legend(title='Survived', labels=['Not Survived', 'Survived'])
plt.tight_layout()
plt.show()

#绘制年龄与生还的关系图
bins = [0, 12, 18, 59, 100]
labels = ['Child (0-12)', 'Teenager (13-18)', 'Adult (19-59)', 'Elderly (60+)']
data['Age Group'] = pd.cut(data['Age'], bins=bins, labels=labels, right=False)
grouped_data_age = data.groupby(['Age Group', 'Survived']).size().unstack()
ax = grouped_data_age.plot(kind='bar', stacked=True, figsize=(12, 7), color=["#FF9999", "#66B2FF"])
plt.title('Distribution of Survived and Not Survived Passengers by Age Group (Stacked Bar Chart)')
plt.xlabel('Age Group')
plt.xticks(rotation=0)
plt.ylabel('Count')
plt.legend(title='Survived', labels=['Not Survived', 'Survived'])
plt.tight_layout()
plt.show()

#绘制性别和等级与生还的关系图
grouped_data_sex_pclass = data.groupby(['Sex', 'Pclass', 'Survived']).size().unstack()
ax = grouped_data_sex_pclass.plot(kind='bar', stacked=True, figsize=(12, 7), color=["#FF9999", "#66B2FF"])
plt.title('Distribution of Survived and Not Survived Passengers by Sex and Passenger Class (Stacked Bar Chart)')
plt.xlabel('Sex, Passenger Class (Pclass)')
ax.set_xticklabels(['Female, 1st Class', 'Female, 2nd Class', 'Female, 3rd Class', 'Male, 1st Class', 'Male, 2nd Class', 'Male, 3rd Class'], rotation=45)
plt.ylabel('Count')
plt.legend(title='Survived', labels=['Not Survived', 'Survived'])
plt.tight_layout()
plt.show()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

数字生命Allen

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值