import urllib.request
import os
import numpy as np
import pandas as pd
from sklearn import preprocessing
from keras.models import Sequential
from keras.layers import Dense, Dropout
#下载数据集
url="http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3.xls"
filepath="data/titanic3.xls"
if not os.path.isfile(filepath):
result=urllib.request.urlretrieve(url,filepath)
print('downloaded:',result)
#导入数据
all_df = pd.read_excel(filepath)
#将有关联的数据导入
cols = ['survived','name','pclass','sex','age','sibsp','parch','fare','embarked']
all_df=all_df[cols]
#分训练集和测试集
msk = np.random.rand(len(all_df))<0.8
train_df = all_df[msk]
test_df = all_df[~msk]
print('total:',len(all_df),
'train:',len(train_df),
'test:',len(test_df))
#函数:数据处理
def PreprocessData(raw_df):
#去掉name字段
df = raw
Keras多层感知机预测泰坦尼克号旅客生存概率
最新推荐文章于 2024-06-03 23:32:23 发布