过程都在代码里:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
train = pd.read_csv(r'...\train.csv')
test = pd.read_csv(r'...\test.csv')
train.info()
#----------------------------know about the data----------------------------------------------#
sns.set_style('whitegrid')
# The following Heatmap will reveal the missing values.
# White lines indicate the missing values.
sns.heatmap(train.isnull(), yticklabels = False, cbar = False, cmap = "Blues")
# Checking how many survived vs. how many did not with respect to gender.
sns.countplot(x = 'Survived', hue = 'Sex', data = train, palette = 'RdBu_r')
# Checking how many survived vs. how many did not with respect to class.
sns.countplot(x = 'Survived', hue = 'Pclass', data = train, palette = 'rainbow')
# Checking the distribution of age
sns.distplot(train['Age'].dropna(), kde = True, color='blue', bins = 30)
# Checking the age groups of the people within each class.
# Grouped into classes
plt.figure(figsize = (12, 7))
sns.boxplot(x = "Pclass", y = <