Tensorflow2.0使用keras搭建神经网络(第二节)

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import os
import sys
import time
import sklearn
from tensorflow import keras

import tensorflow as tf
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.0
sys.version_info(major=3, minor=7, micro=3, releaselevel=‘final’, serial=0)
matplotlib 3.0.3
numpy 1.16.2
pandas 0.24.2
sklearn 0.20.3
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf

2.4 实战分类模型之模型构建

fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all),(x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]
# 数据归一化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(
    x_train.astype(np.float32).reshape(-1,1)).reshape(-1, 28, 28)
x_valid_scaled = scaler.transform(
    x_valid.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
x_test_scaled = scaler.transform(
    x_test.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
x_train_scaled.shape, y_train.shape

((55000, 28, 28), (55000,))

def show_imgs(n_rows, n_cols, x_data, y_data, class_names):
    fig = plt.figure(figsize=(1.4*n_cols, 1.5*n_rows))
    for row in range(n_rows):
        for col in range(n_cols):
            index = row * n_cols + col
            plt.subplot(n_rows, n_cols, index+1)
            plt.imshow(x_data[index], cmap='binary', interpolation='nearest')
            plt.axis('off')
            plt.title(class_names[y_data[index]])
    plt.show()
class_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat',
                      'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
show_imgs(3, 5, x_train_scaled, y_train, class_names)

在这里插入图片描述

# 构建model
'''
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape = [28, 28]))
model.add(keras.layers.Dense(300, activation='relu'))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
'''
# 构建model
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape = [28, 28]),
    keras.layers.Dense(300, activation='relu'),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])
# relu : y = max(0, x)
# softmax: 将向量变成概率分布, x = [x1, x2, x3]
#           y = [e^x1/sum, e^x2/sum, e^x3/sum], sum = e^x1 + e^x2 + e^x3
# reason for sparse: y-> index, y已经是一个向量用categorical_crossentropy,y只是一个数用'sparse_categorical_crossentropy
model.compile(loss='sparse_categorical_crossentropy', # 损失函数:交叉熵
                     optimizer = 'sgd',
                     metrics = ['accuracy'])
# 查看model的信息
model.layers

[<tensorflow.python.keras.layers.core.Flatten at 0x1a5a76ee10>,
<tensorflow.python.keras.layers.core.Dense at 0x1a5a76e048>,
<tensorflow.python.keras.layers.core.Dense at 0x1a5a76e198>,
<tensorflow.python.keras.layers.core.Dense at 0x1a5a7b62b0>]

model.summary()
# 输入层 -> 隐藏层 (w: 权重(784,300) ,b:偏置项 (300,) ), 235500 = 784 * 300  + 300
# [样本数, 784] * w + b -> [样本数, 300]

在这里插入图片描述

history = model.fit(x_train_scaled, y_train, epochs=10, validation_data=(x_valid_scaled, y_valid)) # 遍历10次,每次遍历会拿验证集来验证准确率

在这里插入图片描述

def plot_learning_curving(history):
    pd.DataFrame(history.history).plot(figsize=(8, 5))
    plt.grid(True)
    plt.gca().set_ylim(0, 1)
    plt.show()
plot_learning_curving(history)

在这里插入图片描述

# 验证测试机
model.evaluate(x_test_scaled, y_test)

2.6 使用回调函数

# 构建model
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape = [28, 28]),
    keras.layers.Dense(300, activation='relu'),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy',  
                     optimizer = 'sgd',
                     metrics = ['accuracy'])
logdir = './callbacks'
if not os.path.isdir(logdir):
    os.mkdir(logdir)
output_model_file = os.path.join(logdir, 'fashion_mnist_model.h5')
callbacks = [
    keras.callbacks.TensorBoard(logdir),  # 训练过程中保存相关数据
    keras.callbacks.ModelCheckpoint(output_model_file, save_best_only=True), # 每隔一段时间保存数据
    keras.callbacks.EarlyStopping(patience=5, min_delta=1e-3) # min_delta:两次间隔损失值的差值与min_delta比较
                                                                                        # patience: 多次达到了min_delta则提前停止
]
history = model.fit(x_train_scaled, y_train, epochs=10, 
                    validation_data=(x_valid_scaled, y_valid),
                   callbacks = callbacks) # 遍历10次,每次遍历会拿验证集来验证准确率
# 到callbacks的外层目录下执行 tensorboard --logdir callbacks,可查看accuracy,loss图                     

2.9 构建深度神经网络

# 构建model
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
for _ in range(20):
    model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',
                     optimizer = 'sgd',
                     metrics = ['accuracy'])
logdir = './dnn-callbacks'
if not os.path.isdir(logdir):
    os.mkdir(logdir)
output_model_file = os.path.join(logdir, 'fashion_mnist_model.h5')
callbacks = [
    keras.callbacks.TensorBoard(logdir),  # 训练过程中保存相关数据
    keras.callbacks.ModelCheckpoint(output_model_file, save_best_only=True), # 每隔一段时间保存数据
    keras.callbacks.EarlyStopping(patience=5, min_delta=1e-3) # min_delta:两次间隔损失值的差值与min_delta比较
                                                                                        # patience: 多次达到了min_delta则提前停止
]
history = model.fit(x_train_scaled, y_train, epochs=10, 
                    validation_data=(x_valid_scaled, y_valid),
                   callbacks = callbacks) # 遍历10次,每次遍历会拿验证集来验证准确率
# 到callbacks的外层目录下执行 tensorboard --logdir callbacks,可查看accuracy,loss图
plot_learning_curving(history)
# 在视频中在前3次训练中损失值曲线不下降主要由以下2个原因引起(我的曲线正常)
# 1.参数众多,训练不充分
# 2. 梯度消失主要发生在深度神经网络中,因为梯度是由链式法则进行求导的,链式法则就是复合函数求导,导致梯度消失

在这里插入图片描述

2-11 wide&deep模型

  1. 稀疏特征

    • 离散值特征
    • One-hot表示
      • eg: 专业 = [计算机, 美术] 则美术的One-hot为[0, 1]
      • 上面的例子只有2个值,看上去觉得不稀疏,再举例:词表 = [人工智能,孙悟空,年龄…] ,那么“年龄”的One-hot=[0,0,1,…]
    • 叉乘 = {(计算机,人工智能),(计算机,孙悟空),…}
      • 稀疏特征做叉乘可以记住这个样本( 如果一个物体用离散特征来表达他的信息,那么叉乘后就是他的全部信息集合,如果来了一个新的样本,也必定属于这个集合,所以也就能正确预测这个样本了)
  2. 稀疏特征优缺点

    • 优点
      • 有效,广泛应用于工业界
    • 缺点
      • 需要人工设计(只能人工选取几个特征做叉乘,不能将每个离散特征都两两做叉乘,否则特征空间过大,还容易记住这个样本出现过拟合)
      • 泛化能力差,没出现过就不会起效果
  3. 密集特征

    • 向量表达(带有语义)
      • 还是用上面的例子, 词表 = [人工智能,孙悟空,年龄…],里面的“年龄”=[0.3, 0.2, 0.4, (n维向量)]
    • Word2vec工具
      • 男 + 女 = 国王 + 王后 (男和女的特征相加 = 国王和王后的特征相加,因为是带有语义的,从这个里面就可以解读一些有趣的信息,可能是国王也是男的,王后是女的所以加起来 和 男加女的值一样)
  4. 密集特征优缺点

    • 优点:
      • 带有语义信息,不同向量之间可以通过计算距离查看相关性,从而可以把相似的样本预测对,兼容没有出现过的特征组合
      • 更少人工参与(是通过模型训练出来的)
    • 缺点:
      • 如果这种特征比较泛滥,容易过度泛化,推荐不怎么相关的产品
        在这里插入图片描述
        sparse feature为稀疏特征,Dense为sparse稀疏特征的一个密集表达
        在这里插入图片描述
        上图为google pay的一个wide-deep网络,左侧为deep模型,continuous特征无需做密集特征处理,categorical离散特征做密集
        特征处理,User Installed App和Impression App做叉乘作为wide模型的输入

输入数据准备

使用房价预测数据
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
from sklearn.model_selection import train_test_split
x_train_all, x_test, y_train_all, y_test = train_test_split(
    housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid = train_test_split(
    x_train_all, y_train_all, random_state = 11)
x_train_all.shape, x_test.shape, x_train.shape, x_valid.shape

((15480, 8), (5160, 8), (11610, 8), (3870, 8))

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

def plot_learning_curving(history):
    pd.DataFrame(history.history).plot(figsize=(8, 5))
    plt.grid(True)
    plt.gca().set_ylim(0, 1)
    plt.show()

2-12 函数式API实现wide&deep模型

# deep模型
input = keras.layers.Input(shape=x_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation='relu')(input)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)

# 假设wide模型的输入也是同一个input,连接wide和deep模型
concat = keras.layers.concatenate([input, hidden2])
# wide和deep模型都输出到同一个unit
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs = [input], outputs = [output])

model.summary()
model.compile(loss='mean_squared_error', optimizer='adam', # 用sgd的loss会变nan,应该是学习率太大
                     patience=5, min_delta=1e-2)
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)
            ]
history = model.fit(x_train_scaled, y_train, epochs=10, 
                    validation_data=(x_valid_scaled, y_valid),
                    callbacks = callbacks) # 遍历10次,每次遍历会拿验证集来验证准确率
# 到callbacks的外层目录下执行 tensorboard --logdir callbacks,可查看accuracy,loss图

2-13 子类API实现wide & deep模型

class WideDeepModel(keras.models.Model):
    def __init__(self):
        super(WideDeepModel, self).__init__()
        # 定义模型层次
        self.hidden1_layer = keras.layers.Dense(30, activation='relu')
        self.hidden2_layer = keras.layers.Dense(30, activation='relu')
        self.output_layer = keras.layers.Dense(1)
    
    def call(self, input):
        # 完成模型的正向计算
        hidden1 = self.hidden1_layer(input)
        hidden2 = self.hidden2_layer(hidden1)
        concat = keras.layers.concatenate([input, hidden2])
        output = self.output_layer(concat)
        return output

model = WideDeepModel()
model.build(input_shape = (None, 8))
model.summary()
model.compile(loss='mean_squared_error', optimizer='adam',
                     patience=5, min_delta=1e-2)
                     
callbacks = [keras.callbacks.EarlyStopping(
                patience=5, min_delta=1e-2)]
history = model.fit(x_train_scaled, y_train, epochs=10, 
                    validation_data=(x_valid_scaled, y_valid),
                    callbacks = callbacks)

2-14 模型的多输入和多输出实战

多输入单输出
input_wide = keras.layers.Input(shape=[5])
input_deep = keras.layers.Input(shape=[6])
hidden1 = keras.layers.Dense(30, activation='relu')(input_deep)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat = keras.layers.concatenate([input_wide, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs = [input_wide, input_deep], outputs = [output])
model.summary()
model.compile(loss="mean_squared_error", optimizer='sgd')
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]

x_train_scaled_wide = x_train_scaled[:,:5]
x_train_scaled_deep = x_train_scaled[:,2:]
x_valid_scaled_wide = x_valid_scaled[:,:5]
x_valid_scaled_deep = x_valid_scaled[:,2:]
x_test_scaled_wide = x_test_scaled[:,:5]
x_test_scaled_deep = x_test_scaled[:,2:]

history = model.fit([x_train_scaled_wide, x_train_scaled_deep], y_train,
                   validation_data=([x_valid_scaled_wide, x_valid_scaled_deep], y_valid),
                    epochs=100,
                    callbacks = callbacks
                   )
多输入多输出

比如当前预测的房价是今年的房价,如果还要预测明年的房价就要使用多输出

# input_wide和input_deep结合到一个输出,再input_deep的DNN单独一个输出
input_wide = keras.layers.Input([5])
input_deep = keras.layers.Input([6])
hidden1 = keras.layers.Dense(30, activation='relu')(input_deep)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat = keras.layers.concatenate([input_wide, hidden2])
output1 = keras.layers.Dense(1)(concat)
output2 = keras.layers.Dense(1)(hidden2)

model = keras.models.Model(inputs = [input_wide, input_deep],
                                      outputs = [output1, output2])
model.summary()
model.compile(loss='mean_squared_error', optimizer='sgd')
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]

在这里插入图片描述

x_train_scaled_wide = x_train_scaled[:,:5]
x_train_scaled_deep = x_train_scaled[:,2:]
x_valid_scaled_wide = x_valid_scaled[:,:5]
x_valid_scaled_deep = x_valid_scaled[:,2:]
x_test_scaled_wide = x_test_scaled[:,:5]
x_test_scaled_deep = x_test_scaled[:,2:]
history = model.fit([x_train_scaled_wide, x_train_scaled_deep],[y_train, y_train],
         validation_data=([x_valid_scaled_wide, x_valid_scaled_deep], [y_valid, y_valid]),
          epochs=100,
          callbacks = callbacks
         )

2-15 超参数搜索

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

2-16 超参数搜索

    model = keras.models.Sequential([
        keras.layers.Input(shape = [8]),
        keras.layers.Dense(30, activation='relu'),
        keras.layers.Dense(1)
    ])
    lr= 0.0001
    optimizer = keras.optimizers.SGD(lr)
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    callbacks =[keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
    history = model.fit(x_train_scaled, y_train,
                       validation_data = (x_valid_scaled, y_valid),
                       epochs=1,
                       callbacks = callbacks)
learning_rates = [1e-4, 3e-4, 1e-3, 3e-3, 1e-2, 3e-2]
historys = []
for lr in learning_rates:
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape = [8]),
        keras.layers.Dense(30, activation='relu'),
        keras.layers.Dense(1)
    ])
    optimizer = keras.optimizers.SGD(lr)
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    callbacks =[keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
    history = model.fit(x_train_scaled, y_train,
                       validation_data = (x_valid_scaled, y_valid),
                       epochs=20,
                       callbacks = callbacks)
    historys.append(history)
    
for lr, history in zip(learning_rates, historys):
    print("lr", lr)
    plot_learning_curving(history)

2-17 使用sklearn封装keras模型

# 1. 转化为sklearn的model
def build_model(hidden_layers = 1, layer_size = 30, learning_rate = 3e-3):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(layer_size, activation='relu', 
                   input_shape=x_train_scaled.shape[1:] )) # 
    # 可以将input和第一个隐藏层写到Dense中
    for _ in range(hidden_layers-1):
        model.add(keras.layers.Dense(layer_size, activation='relu'))
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(learning_rate)
    model.compile(loss='mse', optimizer=optimizer)
    return model

callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
sklearn_model = keras.wrappers.scikit_learn.KerasRegressor(build_fn = build_model)
history = sklearn_model.fit(x_train_scaled, y_train, validation_data = (
                        x_valid_scaled, y_valid),
                         epochs = 10, 
                         callbacks = callbacks)

2-18 sklearn封装的模型进行随机超参数搜索

from scipy.stats import reciprocal
# f(x) = 1/(x*log(b/a)) a <= x <= b
param_distribution = {
    "hidden_layers" : [1,2,3,4],
    "layer_size" : np.arange(1,100),
    "learning_rate" : reciprocal(1e-4, 1e-2)
}
from sklearn.model_selection import RandomizedSearchCV
random_search_cv = RandomizedSearchCV(sklearn_model,
                                     param_distribution,
                                     n_iter = 10,
                                     cv = 3,
                                     n_jobs = 1)  # cv:不写默认为3,将训练集分割成3份,前两份作为训练集,后一份作为cv
                                                        # x_train_scaled 总量11610,下面训练使用7740 ,最后
random_search_cv.fit(x_train_scaled, y_train, 
                                epochs = 100,
                                validation_data = (x_valid_scaled, y_valid),
                                callbacks = callbacks
                    )
print(random_search_cv.best_params_)
print(random_search_cv.best_score_)
print(random_search_cv.best_estimator_)
model = random_search_cv.best_estimator_.model
model.evaluate(x_test_scaled, y_test)x'c
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值