用tensorflow 解决 Titanic v1
import pandas as pd
from tensorflow import keras
import matplotlib.pyplot as plt
import os
PATH = './titanic'
train_data = pd.read_csv(os.path.join(PATH, 'train.csv'))
test_data = pd.read_csv(os.path.join(PATH, 'test.csv'))
print('训练集信息:')
train_data.info()
print('测试集信息:')
test_data.info()
predId = test_data['PassengerId']
def arrange(df):
df = df.drop('Name', axis=1)
df['Age'] = df['Age'].fillna(df['Age'].mean())
df['Embarked'] = df['Embarked'].fillna('S')
df['Cabin'] = df['Cabin'].fillna('U')
df['Sex'] = df['Sex'].apply(lambda x: 1 if x == 'male' else 0)
embarkedDf = pd.get_dummies(df['Embarked'], prefix='Embarked')
df = pd.concat([df, embarkedDf], axis=1).drop('Embarked', axis=1)
pclassDf = pd.get_dummies(df['Pclass'], prefix='Pclass')
df = pd.concat([df, pclassDf], axis=1).drop('Pclass', axis=1)
df['Cabin'] = df['Cabin'].apply(lambda x: ord(x[0]) - ord('A'))
df = df.drop('PassengerId', axis=1).drop('Ticket', axis=1)
return df
train_data = arrange(train_data)
train_x = train_data.drop('Survived', axis=1)
train_y = train_data['Survived']
print('整理后训练集:')
print(train_x.head())
print(train_x.shape)
test_data = arrange(test_data)
model = keras.Sequential()
model.add(keras.layers.InputLayer(input_shape=(12)))
model.add(keras.layers.Dense(20, activation='relu'))
model.add(keras.layers.Dense(20, activation='relu'))
model.add(keras.layers.Dense(20, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss=keras.losses.binary_crossentropy, metrics=['accuracy'])
model.summary()
epochs = 100
history = model.fit(x=train_x, y=train_y, batch_size=32, epochs=epochs, validation_split=0.2)
loss = history.history['loss']
acc = history.history['accuracy']
val_loss = history.history['val_loss']
val_acc = history.history['val_accuracy']
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(range(epochs), acc, label='Train acc')
plt.plot(range(epochs), val_acc, label='Val acc')
plt.legend(loc='lower right')
plt.title('Train and Val accuracy')
plt.subplot(1, 2, 2)
plt.plot(range(epochs), loss, label='Train loss')
plt.plot(range(epochs), val_loss, label='Val loss')
plt.legend(loc='lower right')
plt.title('Train and Val loss')
plt.show()
model.save('model_v1.h5')
pred_y = model.predict_classes(test_data)
pred_y = pred_y.astype(int)
pred_y = pd.Series(map(lambda x: x[0], pred_y))
predDf = pd.DataFrame(
{'PassengerId': predId,
'Survived': pred_y})
print('测试结果预览:', predDf.head())
predDf.to_csv('pred_v1.csv')