导入相关库
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import urllib.request
import os
%matplotlib inline
print("Tensorflow版本是:",tf.__version__)
下载泰坦尼克号上旅客的数据集
下载旅客数据集
data_url="http://biostat.mc.vanderbilt.edu/wili/pub/Main/DataSets/titanic3.xls"
data_file_path="E:\软件\微信/titanic3.xls"
if not os.path.isfile(data_file_path):
result=urllib.request.urlretrieve(data_url,data_file_path)
print('downloaded;',result)
else:
print(data_file_path,'data file already exists.')
查看数据集
import numpy
import pandas as pd
# 读取数据文件,结果为DataFrame格式
df_data = pd.read_excel(data_file_path)
df_data.describe()
df_data
筛选提取字段
selected_cols=['survived','name','pclass','sex','age','sibsp','parch','fare','embarked']
selected_df_data=df_data[selected_cols]
selected_df_data
找出有null值的字段
selected_df_data.isnull()
selected_df_data.isnull().any()
**找出有null值的字段
**
selected_df_data.isnull().sum()
selected_df_data[selected_df_data.isnull().values==True]
填充null值
age_mean_value=selected_df_data['age'].mean()
selected_df_data['age']=selected_df_data['age'].fillna(age_mean_value)
fare_mean_value=selected_df_data['fare'].mean()
selected_df_data['fare']=selected_df_data['fare'].fillna(fare_mean_value)
selected_df_data['embarked']=selected_df_data['embarked'].fillna('S')
筛选提取字段
selected_df_data['sex']=selected_df_data['sex'].map({'female':0,'male':1}).astype(int)
selected_df_data['embarked']=selected_df_data['embarked'].map({'C':0,'Q':1,'S':2}).astype(int)
selected_df_data[:3]
删除name字段
selected_df_data=selected_df_data.drop(['name'],axis=1)
selected_df_data[:3]
分离特征值和标签值
ndarray_data=selected_df_data.values
features=ndarray_data[:,1:]
label=ndarray_data[:,0]
features[:3]
label[:3]
特征值标准化处理
from sklearn import preprocessing
minmax_scale = preprocessing.MinMaxScaler(feature_range=(0,1))
norm_features=minmax_scale.fit_transform(features)
norm_features[:3]
定义数据预处理函数
def prepare_data(df_data):
df=df_data.drop(['name'],axis=1)
age_mean=df['age'].mean()
df['age']=df['age'].fillna(age_mean)
fare_mean=df['fare'].mean()
df['fare']=df['fare'].fillna(fare_mean)
df['sex']=df['sex'].map({'female':0,'male':1}).astype(int)
df['embarked']=df['embarked'].fillna('S')
df['embarked']=df['embarked'].map({'C':0,'Q':1,'S':2}).astype(int)
ndarray_data=df.values
features=ndarray_data[:,1:]
label=ndarray_data[:,0]
minmax_scale=preprocessing.MinMaxScaler(feature_range=(0,1))
norm_features=minmax_scale.fit_transform(features)
return norm_features,label