import pandas as pd
import numpy as np
file='D:\新建文件夹 (6)'
data_train=pd.read_csv(file+'/titanic_train.csv')
data_test=pd.read_csv(file+'/titanic_test')
import pandas as pd
import numpy as np
file='D:\新建文件夹 (6)'
data_train=pd.read_csv(file+'/titanic_train.csv')
data_test=pd.read_csv(file+'/titanic_test.csv')
data_train.info() 读取数据的列的信息
class 'pandas.core.frame.DataFrame'> RangeIndex: 891 entries, 0 to 890 Data columns (total 12 columns): PassengerId 891 non-null int64 Survived 891 non-null int64 Pclass 891 non-null int64 Name 891 non-null object Sex 891 non-null object 可以查看是否有缺失值和总共的行数 Age 714 non-null float64 SibSp 891 non-null int64 Parch 891 non-null int64 Ticket 891 non-null object Fare 891 non-null float64 Cabin 204 non-null object Embarked 889 non-null object dtypes: float64(2), int64(5), object(5) memory usage: 66.2+ KB'''
data_train['Age']=data_train['Age'].fillna(data_train['Age'].median()) 发现有缺失值使用函数进行填充
predictors = ["Pclass","Age","SibSp","