读取数据和预处理
import keras
from keras import layers
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
% matplotlib inline
Using TensorFlow backend.
data = pd. read_csv( "./data/tt_train.csv" )
x = data[ [ 'Survived' , 'Pclass' , 'Sex' , 'Age' , 'SibSp' ,
'Parch' , 'Fare' , 'Embarked' ] ]
x = x. copy( )
x. loc[ : , 'Embarked_S' ] = ( x. Embarked== 'S' ) . astype( 'int' )
x. loc[ : , 'Embarked_C' ] = ( x. Embarked== 'C' ) . astype( 'int' )
x. loc[ : , 'Embarked_Q' ] = ( x. Embarked== 'Q' ) . astype( 'int' )
del x[ 'Embarked' ]
x = x. join( pd. get_dummies( x. Sex) )
del x[ 'Sex' ]
x[ 'Age' ] = x. Age. fillna( x. Age. mean( ) )
x. loc[ : , 'P1' ] = ( x. Pclass== 1 ) . astype( 'int' )
x. loc[ : , 'P2' ] = ( x. Pclass== 2 ) . astype( 'int' )
x. loc[ : , 'P3' ] = ( x. Pclass== 3 ) . astype( 'int' )
del x[ 'Pclass' ]
y = data. Survived
del x[ 'Survived' ]
x. shape, y. shape
((891, 12), (891,))
建立和训练模型
model = keras. Sequential( )
model. add( layers. Dense( 32 , input_dim= 12 , activation= 'relu' ) )
model. add( layers. Dense( 32 , activation= 'relu' ) )
model. add( layers. Dense( 1 , activation= 'sigmoid' ) )
WARNING:tensorflow:From E:\MyProgram\Anaconda\envs\krs\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
model. summary( )
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 32) 416
_________________________________________________________________
dense_2 (Dense) (None, 32) 1056
_________________________________________________________________
dense_3 (Dense) (None, 1) 33
=================================================================
Total params: 1,505
Trainable params: 1,505
Non-trainable params: 0
_________________________________________________________________
model. compile (
optimizer= 'adam' ,
loss= 'binary_crossentropy' ,
metrics= [ 'acc' ]
)
history = model. fit( x, y, epochs= 300 , verbose= 0 )
WARNING:tensorflow:From E:\MyProgram\Anaconda\envs\krs\lib\site-packages\tensorflow\python\ops\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
绘制训练过程
plt. plot( range ( 300 ) , history. history. get( 'loss' ) )
[<matplotlib.lines.Line2D at 0x1622de48>]
plt. plot( range ( 300 ) , history. history. get( 'acc' ) )
[<matplotlib.lines.Line2D at 0x162bd9b0>]
导出预测值以提交到Kaggle
data = pd. read_csv( "./data/tt_test.csv" )
xt = data[ [ 'Pclass' , 'Sex' , 'Age' , 'SibSp' ,
'Parch' , 'Fare' , 'Embarked' ] ]
xt = xt. copy( )
xt. loc[ : , 'Embarked_S' ] = ( xt. Embarked== 'S' ) . astype( 'int' )
xt. loc[ : , 'Embarked_C' ] = ( xt. Embarked== 'C' ) . astype( 'int' )
xt. loc[ : , 'Embarked_Q' ] = ( xt. Embarked== 'Q' ) . astype( 'int' )
del xt[ 'Embarked' ]
xt = xt. join( pd. get_dummies( xt. Sex) )
del xt[ 'Sex' ]
x[ 'Age' ] = xt. Age. fillna( xt. Age. mean( ) )
xt. loc[ : , 'P1' ] = ( xt. Pclass== 1 ) . astype( 'int' )
xt. loc[ : , 'P2' ] = ( xt. Pclass== 2 ) . astype( 'int' )
xt. loc[ : , 'P3' ] = ( xt. Pclass== 3 ) . astype( 'int' )
del xt[ 'Pclass' ]
xt. shape
(418, 12)
predictions = model. predict( xt)
submission = pd. DataFrame( { "PassengerId" : data[ "PassengerId" ] , "Survived" : ( predictions. flatten( ) > 0.5 ) . astype( 'int' ) } )
submission. to_csv( "./data/tt_upload.csv" , index= False )
E:\MyProgram\Anaconda\envs\krs\lib\site-packages\ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in greater