import urllib. request
import os
data_url= "http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3.xls"
data_file_path= "data/titanic3.xls"
if not os. path. isfile( data_file_path) :
result= urllib. request. urlretrieve( data_url, data_file_path)
print ( "downloaded:" , result)
else :
print ( data_file_path, "data file already exists." )
import numpy
import pandas as pd
df_data= pd. read_excel( data_file_path)
df_data. describe( )
selected_cols= [ 'survived' , 'name' , 'pclass' , 'sex' , 'age' , 'sibsp' , 'parch' , 'fare' , 'embarked' ]
selected_df_data= df_data[ selected_cols]
selected_df_data= selected_df_data. sample( frac= 1 )
selected_df_data. isnull( ) . sum ( )
age_mean_value= selected_df_data[ 'age' ] . mean( )
selected_df_data[ 'age' ] = selected_df_data[ 'age' ] . fillna( age_mean_value)
fare_mean_value= selected_df_data[ 'fare' ] . mean( )
selected_df_data[ 'fare' ] = selected_df_data[ 'fare' ] . fillna( fare_mean_value)
selected_df_data[ 'embarked' ] = selected_df_data[ 'embarked' ] . fillna( 'S' )
selected_df_data[ 'sex' ] = selected_df_data[ 'sex' ] . map ( { 'female' : 0 , 'male' : 1 } ) . astype( int )
selected_df_data[ 'embarked' ] = selected_df_data[ 'embarked' ] . map ( { 'C' : 0 , 'Q' : 1 , 'S' : 2 } ) . astype( int )
selected_df_data= selected_df_data. drop( [ 'name' ] , axis= 1 )
selected_df_data[ : 3 ]
ndarray_data= selected_df_data. values
features = ndarray_data[ : , 1 : ]
label = ndarray_data[ : , 0 ]
from sklearn import preprocessing
minmax_scale= preprocessing. MinMaxScaler( feature_range= ( 0 , 1 ) )
norm_features= minmax_scale. fit_transform( features)
train_size= int ( len ( norm_features) * 0.8 )
x_train = norm_features[ : train_size]
y_train = label[ : train_size]
x_test = norm_features[ train_size: ]
y_test = label[ train_size: ]
import tensorflow as tf
model = tf. keras. models. Sequential( )
model. add( tf. keras. layers. Dense( units= 64 ,
input_dim= 7 ,
use_bias= True ,
kernel_initializer= 'uniform' ,
bias_initializer= 'zeros' ,
activation= 'relu' ) )
model. add( tf. keras. layers. Dense( units= 32 ,
activation= 'relu' ) )
model. add( tf. keras. layers. Dense( units= 16 ,
activation= 'sigmoid' ) )
model. add( tf. keras. layers. Dense( units= 1 ,
activation= 'sigmoid' ) )
model. summary( )
model. compile ( optimizer= tf. keras. optimizers. Adam( 0.003 ) ,
loss= 'binary_crossentropy' ,
metrics= [ 'accuracy' ] )
logdir= os. path. join( "logs" )
checkpoint_path= './checkpoint/Titanic.{epoch:02d}-{val_loss:.2f}.ckpt'
callbacks= [
tf. keras. callbacks. TensorBoard( log_dir= logdir,
histogram_freq= 2 ) ,
tf. keras. callbacks. ModelCheckpoint( filepath= checkpoint_path,
save_weights_only= True ,
verbose= 1 ,
period= 5 )
]
train_history= model. fit( x= x_train,
y= y_train,
validation_split= 0.2 ,
epochs= 100 ,
batch_size= 40 ,
callbacks= callbacks,
verbose= 2 )
train_history. history. keys( )
import matplotlib. pyplot as plt
def visu_tain_history ( train_history, train_metric, validation_metric) :
plt. plot( train_history. history[ train_metric] )
plt. plot( train_history. history[ validation_metric] )
plt. title( 'Train History' )
plt. ylabel( train_metric)
plt. xlabel( 'epoch' )
plt. legend( [ 'train' , 'validation' ] )
plt. show( )
visu_tain_history( train_history, 'accuracy' , 'val_accuracy' )
visu_tain_history( train_history, 'loss' , 'val_loss' )
evaluate_result= model. evaluate( x= x_test, y= y_test)
evaluate_result
model. metrics_names
"""
#保存时的路径
logdir='./logs'
checkpoint_path='./checkpoint/Titanic.{epoch:02d}-{val_loss:.2f}.ckpt'
checkpoint_dir=os.path.dirname(checkpoint_path)
latest=tf.train.latest_checkpoint(checkpoint_dir)
model.load_weights(latest)
#之后相当于训练完成的模型
#模型评估
loss,acc=model.evaluate(x_test,y_test)
.....
"""