import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import Image
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
plt.rcParams['figure.figsize'] = (10, 6) # 设置输出图片大小
载入我们提供清洗之后的数据(clear_data.csv),大家也将原始数据载入(train.csv),说说他们有什么不同
data = pd.read_csv('clear_data.csv')
train = pd.read_csv('train.csv')
data.shape, train.shape
data.head(5)
train.head(5)
任务一:切割训练集和测试集
x = data
y = train['Survived']
from sklearn.model_selection import train_test_split
train_test_split?
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, stratify = y, random_state