TensorFlow基础(训练多层感知机)

训练一个简单的模型预测泰坦尼克号生还者模型

训练一个模型需要:

  1. 获取数据文件
  2. 数据预处理
    1 . 非数值数据处理(映射字符串为数值,处理空值)
    2 . 数值处理(归一化)
  3. 构建模型
  4. 训练模型,可视化训练过程
  5. 预测

数据预处理代码

为了数据处理模块适应多种数据类型,这里定义了基类DataProcessBase以实现后续处理不同的数据类型。每个类需要实现自己的process方法。

from DataProcess.DataProcessBase import DataProcessBase
import pandas as pd
import tensorflow as tf
from collections import OrderedDict
from functools import partial


class CsvDataProcess(DataProcessBase):
    def __init__(self, train_csv, eval_csv, mode='train'):
        self.train_csv = train_csv
        self.eval_csv = eval_csv
        self.mode = mode
        self.train_data = pd.read_csv(self.train_csv)
        self.columns, self.label_name = self.data_field_process(label_index=0)

    def data_field_process(self, label_index):
       # 获取数据列名称
        column_names = self.train_data.columns.tolist()
        # 标签名称,(名称列中第一位)
        label_name = column_names[label_index]
        return column_names, label_name

    def get_dataset(self, batch_size=12, num_epochs=1):
        train_dataset = tf.data.experimental.make_csv_dataset(self.train_csv, batch_size=batch_size,
                                                              label_name=self.label_name, na_value='?',
                                                              num_epochs=num_epochs, ignore_errors=True)
        eval_dataset = tf.data.experimental.make_csv_dataset(self.eval_csv, batch_size=batch_size,
                                                             label_name=self.label_name, na_value='?',
                                                             num_epochs=num_epochs, ignore_errors=True)
        return train_dataset, eval_dataset
   # 对连续数值除以均值
    def continous_data_mean(self, mean, data):
        data = tf.cast(data, tf.float32) * 1 / (2 * mean)
        return tf.reshape(data, [-1, 1])
    # 连续数值计算均值,将特征列设置为数值类型
    def process_continous_data(self, continous_fields=['age', 'n_siblings_spouses', 'parch', 'fare']):
        means = OrderedDict()
        numerical_columns = []
        for field in continous_fields:
            means[field] = self.train_data[field].mean()
        for feature in means.keys():
            num_col = tf.feature_column.numeric_column(feature, normalizer_fn=partial(self.continous_data_mean,
                                                                                      means[feature]))
            numerical_columns.append(num_col)
        return numerical_columns
   # 处理字段类型,映射为数值
    def process_discrete_data(self, discrete_fields=['sex', 'class', 'deck', 'embark_town', 'alone']):
        discrete_map = OrderedDict()
        categorical_columns = []
        for field in discrete_fields:
            discrete_value = list(filter(lambda x: x != 'unknown', sorted(set(self.train_data[field]))))
            discrete_map[field] = discrete_value
        # discrete_map['deck']=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
        # print("Class:{}".format(discrete_map['deck']))
        for feature, vocab in discrete_map.items():
            cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
                key=feature, vocabulary_list=vocab)
            categorical_columns.append(tf.feature_column.indicator_column(cat_col))
        return categorical_columns

    def process(self, continous_fields, descrete_fields):
        data_columns = []
        numerical_columns = self.process_continous_data(continous_fields)
        discrete_columns = self.process_discrete_data(descrete_fields)
        data_columns = discrete_columns + numerical_columns
        return data_columns


定义模型


 train_csv = join(pardir,'datas/train.csv')
 eval_csv = join(pardir,'datas/eval.csv')
 cdp = CsvDataProcess(train_csv,eval_csv=eval_csv)
 descret_fields = ['sex','class','deck','embark_town','alone']
 continuous_fields = ['age','n_siblings_spouses','parch','fare']
 result = cdp.process(continous_fields=continuous_fields,descrete_fields=descret_fields)
 preprocessing_layer = tf.keras.layers.DenseFeatures(result)
 model = tf.keras.Sequential([
     preprocessing_layer,
     tf.keras.layers.Dense(128, activation='relu'),
     tf.keras.layers.Dense(128, activation='relu'),
     tf.keras.layers.Dense(1, activation='sigmoid'),
 ])
 model.compile(
     loss='binary_crossentropy',
     optimizer='adam',
     metrics=['accuracy'])
 raw_train_dataset,raw_eval_dataset = cdp.get_dataset()
 # train_dataset = raw_train_dataset.batch(500)
 tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='/tmp/train_model', histogram_freq=1)
 model.fit(raw_train_dataset,epochs=20,callbacks=[tensorboard_callback])

模型为一个多层感知机。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值