TensorFlow2.0 Keras泰坦尼克数据集预测

最新推荐文章于 2024-04-07 10:58:04 发布

_Zephyrus_

最新推荐文章于 2024-04-07 10:58:04 发布

阅读量1.3k

点赞数 1

分类专栏： Deep Learning 文章标签： tantic

本文链接：https://blog.csdn.net/wangxw1803/article/details/100014459

版权

Deep Learning 专栏收录该内容

34 篇文章 1 订阅

订阅专栏

import urllib.request
import os

url = 'http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3.xls'
filepath = './data/titanic3.xls'
if not os.path.isfile(filepath):
    result = urllib.request.urlretrieve(url, filepath)
    print('downloaded:', result)

downloaded: ('./data/titanic3.xls', <http.client.HTTPMessage object at 0x00000214763EAEB8>)

import numpy as np
import pandas as pd

all_df = pd.read_excel('./data/titanic3.xls')

cols = [
    'survived', 'name', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
    'embarked'
]
all_df = all_df[cols]

all_df[:2]

	survived	name	pclass	sex	age	sibsp	parch	fare	embarked
0	1	Allen, Miss. Elisabeth Walton	1	female	29.0000	0	0	211.3375	S
1	1	Allison, Master. Hudson Trevor	1	male	0.9167	1	2	151.5500	S

df = all_df.drop(['name'], axis=1)

all_df.isnull().sum()

survived      0
name          0
pclass        0
sex           0
age         263
sibsp         0
parch         0
fare          1
embarked      2
dtype: int64

age_mean = df['age'].mean()
df['age'] = df['age'].fillna(age_mean)

fare_mean = df['fare'].mean()
df['fare'] = df['fare'].fillna(fare_mean)

df['sex'] = df['sex'].map({'female':0, 'male': 1}).astype(int)

x_Onehot_df = pd.get_dummies(data=df, columns=['embarked'])

x_Onehot_df[:2]

	survived	pclass	sex	age	sibsp	parch	fare	embarked_C	embarked_Q	embarked_S
0	1	1	0	29.0000	0	0	211.3375	0	0	1
1	1	1	1	0.9167	1	2	151.5500	0	0	1

ndarray = x_Onehot_df.values

ndarray.shape

(1309, 10)

ndarray[:2]

array([[  1.    ,   1.    ,   0.    ,  29.    ,   0.    ,   0.    ,
        211.3375,   0.    ,   0.    ,   1.    ],
       [  1.    ,   1.    ,   1.    ,   0.9167,   1.    ,   2.    ,
        151.55  ,   0.    ,   0.    ,   1.    ]])

Label = ndarray[:,0]
Features = ndarray[:, 1:]

Label[:2]

array([1., 1.])

Features[:2]

array([[  1.    ,   0.    ,  29.    ,   0.    ,   0.    , 211.3375,
          0.    ,   0.    ,   1.    ],
       [  1.    ,   1.    ,   0.9167,   1.    ,   2.    , 151.55  ,
          0.    ,   0.    ,   1.    ]])

from sklearn import preprocessing

minmax_Scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
scaledFeatures = minmax_Scale.fit_transform(Features)

scaledFeatures[:2]

array([[0.        , 0.        , 0.36116884, 0.        , 0.        ,
        0.41250333, 0.        , 0.        , 1.        ],
       [0.        , 1.        , 0.00939458, 0.125     , 0.22222222,
        0.2958059 , 0.        , 0.        , 1.        ]])

msk = np.random.rand(len(all_df)) < 0.8
train_df = all_df[msk]
test_df = all_df[~msk]

print('total:', len(all_df), 'train:', len(train_df), 'test:', len(test_df))

total: 1309 train: 1071 test: 238

def PreprocessData(raw_df):
    df = raw_df.drop(['name'], axis=1)
    age_mean = df['age'].mean()
    df['age'] = df['age'].fillna(age_mean)
    fare_mean = df['fare'].mean()
    df['fare'] = df['fare'].fillna(age_mean)
    df['sex'] = df['sex'].map({'female': 0, 'male': 1}).astype(int)
    x_Onehot_df = pd.get_dummies(data=df, columns=['embarked'])
    
    ndarray = x_Onehot_df.values
    Features = ndarray[:, 1:]
    Label = ndarray[:, 0]
    
    minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
    scaledFeatures = minmax_scale.fit_transform(Features)
    
    return scaledFeatures, Label

train_Features, train_Label = PreprocessData(train_df)
test_Features, test_Label = PreprocessData(test_df)

train_Features[:2]

array([[0.        , 0.        , 0.0229641 , 0.125     , 0.22222222,
        0.2958059 , 0.        , 0.        , 1.        ],
       [0.        , 1.        , 0.37369494, 0.125     , 0.22222222,
        0.2958059 , 0.        , 0.        , 1.        ]])

test_Label[:2]

array([1., 1.])

# 建立模型
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential()
model.add(
    Dense(units=40,
          input_dim=9,
          kernel_initializer='uniform',
          activation='relu'))

model.add(Dense(units=30, kernel_initializer='uniform', activation='relu'))
model.add(Dense(units=1,kernel_initializer='uniform', activation='sigmoid'))

model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

W0819 11:23:43.940761  6100 deprecation_wrapper.py:119] From E:\Anaconda3\envs\ml\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0819 11:23:43.970712  6100 deprecation_wrapper.py:119] From E:\Anaconda3\envs\ml\lib\site-packages\keras\backend\tensorflow_backend.py:3376: The name tf.log is deprecated. Please use tf.math.log instead.

W0819 11:23:43.976665  6100 deprecation.py:323] From E:\Anaconda3\envs\ml\lib\site-packages\tensorflow\python\ops\nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

train_history = model.fit(x=train_Features,
                          y=train_Label,
                          validation_split=0.1,
                          batch_size=30,
                          epochs=30,
                          verbose=2)

Train on 963 samples, validate on 108 samples
Epoch 1/30
 - 0s - loss: 0.6645 - acc: 0.6023 - val_loss: 0.5840 - val_acc: 0.7685
Epoch 2/30
 - 0s - loss: 0.6062 - acc: 0.6594 - val_loss: 0.4936 - val_acc: 0.7870
Epoch 3/30
 - 0s - loss: 0.5513 - acc: 0.7487 - val_loss: 0.4564 - val_acc: 0.7870
Epoch 4/30
 - 0s - loss: 0.5151 - acc: 0.7747 - val_loss: 0.4487 - val_acc: 0.8056
Epoch 5/30
 - 0s - loss: 0.4968 - acc: 0.7757 - val_loss: 0.4538 - val_acc: 0.8056
Epoch 6/30
 - 0s - loss: 0.4882 - acc: 0.7736 - val_loss: 0.4354 - val_acc: 0.8056
Epoch 7/30
 - 0s - loss: 0.4839 - acc: 0.7695 - val_loss: 0.4277 - val_acc: 0.8148
Epoch 8/30
 - 0s - loss: 0.4818 - acc: 0.7788 - val_loss: 0.4254 - val_acc: 0.8148
Epoch 9/30
 - 0s - loss: 0.4796 - acc: 0.7840 - val_loss: 0.4231 - val_acc: 0.8333
Epoch 10/30
 - 0s - loss: 0.4766 - acc: 0.7819 - val_loss: 0.4247 - val_acc: 0.8148
Epoch 11/30
 - 0s - loss: 0.4733 - acc: 0.7830 - val_loss: 0.4240 - val_acc: 0.8148
Epoch 12/30
 - 0s - loss: 0.4714 - acc: 0.7840 - val_loss: 0.4174 - val_acc: 0.8333
Epoch 13/30
 - 0s - loss: 0.4684 - acc: 0.7871 - val_loss: 0.4181 - val_acc: 0.8426
Epoch 14/30
 - 0s - loss: 0.4666 - acc: 0.7871 - val_loss: 0.4169 - val_acc: 0.8426
Epoch 15/30
 - 0s - loss: 0.4643 - acc: 0.7892 - val_loss: 0.4151 - val_acc: 0.8519
Epoch 16/30
 - 0s - loss: 0.4632 - acc: 0.7892 - val_loss: 0.4134 - val_acc: 0.8426
Epoch 17/30
 - 0s - loss: 0.4618 - acc: 0.7902 - val_loss: 0.4133 - val_acc: 0.8426
Epoch 18/30
 - 0s - loss: 0.4618 - acc: 0.7913 - val_loss: 0.4145 - val_acc: 0.8056
Epoch 19/30
 - 0s - loss: 0.4606 - acc: 0.7944 - val_loss: 0.4160 - val_acc: 0.8426
Epoch 20/30
 - 0s - loss: 0.4606 - acc: 0.7934 - val_loss: 0.4155 - val_acc: 0.8148
Epoch 21/30
 - 0s - loss: 0.4588 - acc: 0.7944 - val_loss: 0.4124 - val_acc: 0.8426
Epoch 22/30
 - 0s - loss: 0.4568 - acc: 0.7954 - val_loss: 0.4136 - val_acc: 0.8426
Epoch 23/30
 - 0s - loss: 0.4571 - acc: 0.7985 - val_loss: 0.4152 - val_acc: 0.8333
Epoch 24/30
 - 0s - loss: 0.4585 - acc: 0.7923 - val_loss: 0.4190 - val_acc: 0.8056
Epoch 25/30
 - 0s - loss: 0.4577 - acc: 0.7923 - val_loss: 0.4162 - val_acc: 0.8426
Epoch 26/30
 - 0s - loss: 0.4610 - acc: 0.7882 - val_loss: 0.4192 - val_acc: 0.8426
Epoch 27/30
 - 0s - loss: 0.4553 - acc: 0.8006 - val_loss: 0.4156 - val_acc: 0.8333
Epoch 28/30
 - 0s - loss: 0.4580 - acc: 0.7902 - val_loss: 0.4186 - val_acc: 0.7963
Epoch 29/30
 - 0s - loss: 0.4590 - acc: 0.7975 - val_loss: 0.4145 - val_acc: 0.8426
Epoch 30/30
 - 0s - loss: 0.4550 - acc: 0.7934 - val_loss: 0.4165 - val_acc: 0.8241

scores = model.evaluate(x=test_Features, y= test_Label)

238/238 [==============================] - 0s 21us/step

scores[1]

0.8025210089042407

Jack = pd.Series([0, 'Jack', 3, 'male', 23, 1, 0, 5.000, 'S'])
Rose = pd.Series([1, 'Rose', 1, 'female', 20, 1, 0, 100.000, 'S'])

JR_df = pd.DataFrame([list(Jack), list(Rose)],
                     columns=[
                         'survived', 'name', 'pclass', 'sex', 'age', 'sibsp',
                         'parch', 'fare', 'embarked'
                     ])

all_df = pd.concat([all_df, JR_df])

all_df[~2:]

	survived	name	pclass	sex	age	sibsp	fare	embarked
1308	0	Zimmerman, Mr. Leo	3	male	29.0	0	7.875	S
0	0	Jack	3	male	23.0	1	5.000	S
1	1	Rose	1	female	20.0	1	100.000	S

all_Features, Label = PreprocessData(all_df)

all_probability = model.predict(all_Features)

all_probability[:10]

array([[0.97387624],
       [0.36760893],
       [0.9653297 ],
       [0.29578814],
       [0.96136355],
       [0.26288155],
       [0.93404984],
       [0.27685004],
       [0.92254674],
       [0.30783302]], dtype=float32)

pd = all_df
pd.insert(len(all_df.columns),
         'probability', all_probability)

pd[~2:]

	survived	name	pclass	sex	age	sibsp	fare	embarked	probability
1308	0	Zimmerman, Mr. Leo	3	male	29.0	0	7.875	S	0.132631
0	0	Jack	3	male	23.0	1	5.000	S	0.130663
1	1	Rose	1	female	20.0	1	100.000	S	0.963028

pd[(pd['survived'] == 0) ]

	survived	name	pclass	sex	age	sibsp	parch	fare	embarked	probability
2	0	Allison, Miss. Helen Loraine	1	female	2.0	1	2	151.5500	S	0.965330
3	0	Allison, Mr. Hudson Joshua Creighton	1	male	30.0	1	2	151.5500	S	0.295788
4	0	Allison, Mrs. Hudson J C (Bessie Waldo Daniels)	1	female	25.0	1	2	151.5500	S	0.961364
7	0	Andrews, Mr. Thomas Jr	1	male	39.0	0	0	0.0000	S	0.276850
9	0	Artagaveytia, Mr. Ramon	1	male	71.0	0	0	49.5042	C	0.307833
10	0	Astor, Col. John Jacob	1	male	47.0	1	0	227.5250	C	0.382211
15	0	Baumann, Mr. John D	1	male	NaN	0	0	25.9250	S	0.303370
16	0	Baxter, Mr. Quigg Edmond	1	male	24.0	0	1	247.5208	C	0.568902
19	0	Beattie, Mr. Thomson	1	male	36.0	0	0	75.2417	C	0.418435
25	0	Birnbaum, Mr. Jakob	1	male	25.0	0	0	26.0000	C	0.446399
30	0	Blackwell, Mr. Stephen Weart	1	male	45.0	0	0	35.5000	S	0.271255
34	0	Borebank, Mr. John James	1	male	42.0	0	0	26.5500	S	0.275931
38	0	Brady, Mr. John Bertram	1	male	41.0	0	0	30.5000	S	0.278998
39	0	Brandeis, Mr. Emil	1	male	48.0	0	0	50.4958	C	0.364540
40	0	Brewe, Dr. Arthur Jackson	1	male	NaN	0	0	39.6000	C	0.429713
45	0	Butt, Major. Archibald Willingham	1	male	45.0	0	0	26.5500	S	0.269356
46	0	Cairns, Mr. Alexander	1	male	NaN	0	0	31.0000	S	0.304525
51	0	Carlsson, Mr. Frans Olof	1	male	33.0	0	0	5.0000	S	0.291429
52	0	Carrau, Mr. Francisco M	1	male	28.0	0	0	47.1000	S	0.312617
53	0	Carrau, Mr. Jose Pedro	1	male	17.0	0	0	47.1000	S	0.339315
58	0	Case, Mr. Howard Brown	1	male	49.0	0	0	26.0000	S	0.260632
60	0	Cavendish, Mr. Tyrell William	1	male	36.0	1	0	78.8500	S	0.271148
62	0	Chaffee, Mr. Herbert Fuller	1	male	46.0	1	0	61.1750	S	0.246321
70	0	Chisholm, Mr. Roderick Robert Crispin	1	male	NaN	0	0	0.0000	S	0.297509
71	0	Clark, Mr. Walter Miller	1	male	27.0	1	0	136.7792	C	0.426140
74	0	Clifford, Mr. George Quincy	1	male	NaN	0	0	52.0000	S	0.309330
75	0	Colley, Mr. Edward Pomeroy	1	male	47.0	0	0	25.5875	S	0.264827
77	0	Compton, Mr. Alexander Taylor Jr	1	male	37.0	1	1	83.1583	C	0.365659
80	0	Crafton, Mr. John Bertram	1	male	NaN	0	0	26.5500	S	0.303512
81	0	Crosby, Capt. Edward Gifford	1	male	70.0	1	1	71.0000	S	0.200277
...	...	...	...	...	...	...	...	...	...	...
1276	0	Vander Planke, Mrs. Julius (Emelia Maria Vande...	3	female	31.0	1	0	18.0000	S	0.390119
1278	0	Vendel, Mr. Olof Edvin	3	male	20.0	0	0	7.8542	S	0.144151
1279	0	Vestrom, Miss. Hulda Amanda Adolfina	3	female	14.0	0	0	7.8542	S	0.541694
1280	0	Vovk, Mr. Janko	3	male	22.0	0	0	7.8958	S	0.141521
1281	0	Waelens, Mr. Achille	3	male	22.0	0	0	9.0000	S	0.141519
1282	0	Ware, Mr. Frederick	3	male	NaN	0	0	8.0500	S	0.131565
1283	0	Warren, Mr. Charles William	3	male	NaN	0	0	7.5500	S	0.131566
1284	0	Webber, Mr. James	3	male	NaN	0	0	8.0500	S	0.131565
1285	0	Wenzel, Mr. Linhart	3	male	32.5	0	0	9.5000	S	0.128364
1287	0	Widegren, Mr. Carl/Charles Peter	3	male	51.0	0	0	7.7500	S	0.107727
1288	0	Wiklund, Mr. Jakob Alfred	3	male	18.0	1	0	6.4958	S	0.136882
1289	0	Wiklund, Mr. Karl Johan	3	male	21.0	1	0	6.4958	S	0.133120
1291	0	Willer, Mr. Aaron ("Abi Weller")	3	male	NaN	0	0	8.7125	S	0.131564
1292	0	Willey, Mr. Edward	3	male	NaN	0	0	7.5500	S	0.131566
1293	0	Williams, Mr. Howard Hugh "Harry"	3	male	NaN	0	0	8.0500	S	0.131565
1294	0	Williams, Mr. Leslie	3	male	28.5	0	0	16.1000	S	0.133237
1295	0	Windelov, Mr. Einar	3	male	21.0	0	0	7.2500	S	0.142832
1296	0	Wirz, Mr. Albert	3	male	27.0	0	0	8.6625	S	0.135120
1297	0	Wiseman, Mr. Phillippe	3	male	NaN	0	0	7.2500	S	0.131567
1298	0	Wittevrongel, Mr. Camille	3	male	36.0	0	0	9.5000	S	0.124216
1299	0	Yasbeck, Mr. Antoni	3	male	27.0	1	0	14.4542	C	0.161984
1301	0	Youseff, Mr. Gerious	3	male	45.5	0	0	7.2250	C	0.147109
1302	0	Yousif, Mr. Wazli	3	male	NaN	0	0	7.2250	C	0.169266
1303	0	Yousseff, Mr. Gerious	3	male	NaN	0	0	14.4583	C	0.169295
1304	0	Zabour, Miss. Hileni	3	female	14.5	1	0	14.4542	C	0.674486
1305	0	Zabour, Miss. Thamine	3	female	NaN	1	0	14.4542	C	0.603369
1306	0	Zakarian, Mr. Mapriededer	3	male	26.5	0	0	7.2250	C	0.174369
1307	0	Zakarian, Mr. Ortin	3	male	27.0	0	0	7.2250	C	0.173603
1308	0	Zimmerman, Mr. Leo	3	male	29.0	0	0	7.8750	S	0.132631
0	0	Jack	3	male	23.0	1	0	5.0000	S	0.130663

810 rows × 10 columns

pd[(pd['survived'] == 0) & (pd['probability'] > 0.9)]

	name	pclass	sex	age	sibsp	parch	fare	embarked	probability
2	Allison, Miss. Helen Loraine	1	female	2.0	1	2	151.5500	S	0.965330
4	Allison, Mrs. Hudson J C (Bessie Waldo Daniels)	1	female	25.0	1	2	151.5500	S	0.961364
105	Evans, Miss. Edith Corse	1	female	36.0	0	0	31.6792	C	0.973539
169	Isham, Miss. Ann Elizabeth	1	female	50.0	0	0	28.7125	C	0.971705
286	Straus, Mrs. Isidor (Rosalie Ida Blun)	1	female	63.0	1	0	221.7792	S	0.954021

pd[:5]

	survived	name	pclass	sex	age	sibsp	parch	fare	embarked	probability
0	1	Allen, Miss. Elisabeth Walton	1	female	29.0000	0	0	211.3375	S	0.973876
1	1	Allison, Master. Hudson Trevor	1	male	0.9167	1	2	151.5500	S	0.367609
2	0	Allison, Miss. Helen Loraine	1	female	2.0000	1	2	151.5500	S	0.965330
3	0	Allison, Mr. Hudson Joshua Creighton	1	male	30.0000	1	2	151.5500	S	0.295788
4	0	Allison, Mrs. Hudson J C (Bessie Waldo Daniels)	1	female	25.0000	1	2	151.5500	S	0.961364