import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import os
import sys
import time
import sklearn
from tensorflow import keras
import tensorflow as tf
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
2.0.0
sys.version_info(major=3, minor=7, micro=3, releaselevel=‘final’, serial=0)
matplotlib 3.0.3
numpy 1.16.2
pandas 0.24.2
sklearn 0.20.3
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf
2.4 实战分类模型之模型构建
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all),(x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]
# 数据归一化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(
x_train.astype(np.float32).reshape(-1,1)).reshape(-1, 28, 28)
x_valid_scaled = scaler.transform(
x_valid.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
x_test_scaled = scaler.transform(
x_test.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
x_train_scaled.shape, y_train.shape
((55000, 28, 28), (55000,))
def show_imgs(n_rows, n_cols, x_data, y_data, class_names):
fig = plt.figure(figsize=(1.4*n_cols, 1.5*n_rows))
for row in range(n_rows):
for col in range(n_cols):
index = row * n_cols + col
plt.subplot(n_rows, n_cols, index+1)
plt.imshow(x_data[index], cmap='binary', interpolation='nearest')
plt.axis('off')
plt.title(class_names[y_data[index]])
plt.show()
class_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
show_imgs(3, 5, x_train_scaled, y_train, class_names)
# 构建model
'''
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape = [28, 28]))
model.add(keras.layers.Dense(300, activation='relu'))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
'''
# 构建model
model = keras.models.Sequential([
keras.layers.Flatten(input_shape = [28, 28]),
keras.layers.Dense(300, activation='relu'),
keras.layers.Dense(100, activation='relu'),
keras.layers.Dense(10, activation='softmax')
])
# relu : y = max(0, x)
# softmax: 将向量变成概率分布, x = [x1, x2, x3]
# y = [e^x1/sum, e^x2/sum, e^x3/sum], sum = e^x1 + e^x2 + e^x3
# reason for sparse: y-> index, y已经是一个向量用categorical_crossentropy,y只是一个数用'sparse_categorical_crossentropy
model.compile(loss='sparse_categorical_crossentropy', # 损失函数:交叉熵
optimizer = 'sgd',
metrics = ['accuracy'])
# 查看model的信息
model.layers
[<tensorflow.python.keras.layers.core.Flatten at 0x1a5a76ee10>,
<tensorflow.python.keras.layers.core.Dense at 0x1a5a76e048>,
<tensorflow.python.keras.layers.core.Dense at 0x1a5a76e198>,
<tensorflow.python.keras.layers.core.Dense at 0x1a5a7b62b0>]
model.summary()
# 输入层 -> 隐藏层 (w: 权重(784,300) ,b:偏置项 (300,) ), 235500 = 784 * 300 + 300
# [样本数, 784] * w + b -> [样本数, 300]
history = model.fit(x_train_scaled, y_train, epochs=10, validation_data=(x_valid_scaled, y_valid)) # 遍历10次,每次遍历会拿验证集来验证准确率
def plot_learning_curving(history):
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
plot_learning_curving(history)
# 验证测试机
model.evaluate(x_test_scaled, y_test)
2.6 使用回调函数
# 构建model
model = keras.models.Sequential([
keras.layers.Flatten(input_shape = [28, 28]),
keras.layers.Dense(300, activation='relu'),
keras.layers.Dense(100, activation='relu'),
keras.layers.Dense(10, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
logdir = './callbacks'
if not os.path.isdir(logdir):
os.mkdir(logdir)
output_model_file = os.path.join(logdir, 'fashion_mnist_model.h5')
callbacks = [
keras.callbacks.TensorBoard(logdir), # 训练过程中保存相关数据
keras.callbacks.ModelCheckpoint(output_model_file, save_best_only=True), # 每隔一段时间保存数据
keras.callbacks.EarlyStopping(patience=5, min_delta=1e-3) # min_delta:两次间隔损失值的差值与min_delta比较
# patience: 多次达到了min_delta则提前停止
]
history = model.fit(x_train_scaled, y_train, epochs=10,
validation_data=(x_valid_scaled, y_valid),
callbacks = callbacks) # 遍历10次,每次遍历会拿验证集来验证准确率
# 到callbacks的外层目录下执行 tensorboard --logdir callbacks,可查看accuracy,loss图
2.9 构建深度神经网络
# 构建model
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
for _ in range(20):
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer = 'sgd',
metrics = ['accuracy'])
logdir = './dnn-callbacks'
if not os.path.isdir(logdir):
os.mkdir(logdir)
output_model_file = os.path.join(logdir, 'fashion_mnist_model.h5')
callbacks = [
keras.callbacks.TensorBoard(logdir), # 训练过程中保存相关数据
keras.callbacks.ModelCheckpoint(output_model_file, save_best_only=True), # 每隔一段时间保存数据
keras.callbacks.EarlyStopping(patience=5, min_delta=1e-3) # min_delta:两次间隔损失值的差值与min_delta比较
# patience: 多次达到了min_delta则提前停止
]
history = model.fit(x_train_scaled, y_train, epochs=10,
validation_data=(x_valid_scaled, y_valid),
callbacks = callbacks) # 遍历10次,每次遍历会拿验证集来验证准确率
# 到callbacks的外层目录下执行 tensorboard --logdir callbacks,可查看accuracy,loss图
plot_learning_curving(history)
# 在视频中在前3次训练中损失值曲线不下降主要由以下2个原因引起(我的曲线正常)
# 1.参数众多,训练不充分
# 2. 梯度消失主要发生在深度神经网络中,因为梯度是由链式法则进行求导的,链式法则就是复合函数求导,导致梯度消失
2-11 wide&deep模型
-
稀疏特征
- 离散值特征
- One-hot表示
- eg: 专业 = [计算机, 美术] 则美术的One-hot为[0, 1]
- 上面的例子只有2个值,看上去觉得不稀疏,再举例:词表 = [人工智能,孙悟空,年龄…] ,那么“年龄”的One-hot=[0,0,1,…]
- 叉乘 = {(计算机,人工智能),(计算机,孙悟空),…}
- 稀疏特征做叉乘可以记住这个样本( 如果一个物体用离散特征来表达他的信息,那么叉乘后就是他的全部信息集合,如果来了一个新的样本,也必定属于这个集合,所以也就能正确预测这个样本了)
-
稀疏特征优缺点
- 优点
- 有效,广泛应用于工业界
- 缺点
- 需要人工设计(只能人工选取几个特征做叉乘,不能将每个离散特征都两两做叉乘,否则特征空间过大,还容易记住这个样本出现过拟合)
- 泛化能力差,没出现过就不会起效果
- 优点
-
密集特征
- 向量表达(带有语义)
- 还是用上面的例子, 词表 = [人工智能,孙悟空,年龄…],里面的“年龄”=[0.3, 0.2, 0.4, (n维向量)]
- Word2vec工具
- 男 + 女 = 国王 + 王后 (男和女的特征相加 = 国王和王后的特征相加,因为是带有语义的,从这个里面就可以解读一些有趣的信息,可能是国王也是男的,王后是女的所以加起来 和 男加女的值一样)
- 向量表达(带有语义)
-
密集特征优缺点
- 优点:
- 带有语义信息,不同向量之间可以通过计算距离查看相关性,从而可以把相似的样本预测对,兼容没有出现过的特征组合
- 更少人工参与(是通过模型训练出来的)
- 缺点:
- 如果这种特征比较泛滥,容易过度泛化,推荐不怎么相关的产品
sparse feature为稀疏特征,Dense为sparse稀疏特征的一个密集表达
上图为google pay的一个wide-deep网络,左侧为deep模型,continuous特征无需做密集特征处理,categorical离散特征做密集
特征处理,User Installed App和Impression App做叉乘作为wide模型的输入
- 如果这种特征比较泛滥,容易过度泛化,推荐不怎么相关的产品
- 优点:
输入数据准备
使用房价预测数据
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
from sklearn.model_selection import train_test_split
x_train_all, x_test, y_train_all, y_test = train_test_split(
housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid = train_test_split(
x_train_all, y_train_all, random_state = 11)
x_train_all.shape, x_test.shape, x_train.shape, x_valid.shape
((15480, 8), (5160, 8), (11610, 8), (3870, 8))
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)
def plot_learning_curving(history):
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
2-12 函数式API实现wide&deep模型
# deep模型
input = keras.layers.Input(shape=x_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation='relu')(input)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
# 假设wide模型的输入也是同一个input,连接wide和deep模型
concat = keras.layers.concatenate([input, hidden2])
# wide和deep模型都输出到同一个unit
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs = [input], outputs = [output])
model.summary()
model.compile(loss='mean_squared_error', optimizer='adam', # 用sgd的loss会变nan,应该是学习率太大
patience=5, min_delta=1e-2)
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)
]
history = model.fit(x_train_scaled, y_train, epochs=10,
validation_data=(x_valid_scaled, y_valid),
callbacks = callbacks) # 遍历10次,每次遍历会拿验证集来验证准确率
# 到callbacks的外层目录下执行 tensorboard --logdir callbacks,可查看accuracy,loss图
2-13 子类API实现wide & deep模型
class WideDeepModel(keras.models.Model):
def __init__(self):
super(WideDeepModel, self).__init__()
# 定义模型层次
self.hidden1_layer = keras.layers.Dense(30, activation='relu')
self.hidden2_layer = keras.layers.Dense(30, activation='relu')
self.output_layer = keras.layers.Dense(1)
def call(self, input):
# 完成模型的正向计算
hidden1 = self.hidden1_layer(input)
hidden2 = self.hidden2_layer(hidden1)
concat = keras.layers.concatenate([input, hidden2])
output = self.output_layer(concat)
return output
model = WideDeepModel()
model.build(input_shape = (None, 8))
model.summary()
model.compile(loss='mean_squared_error', optimizer='adam',
patience=5, min_delta=1e-2)
callbacks = [keras.callbacks.EarlyStopping(
patience=5, min_delta=1e-2)]
history = model.fit(x_train_scaled, y_train, epochs=10,
validation_data=(x_valid_scaled, y_valid),
callbacks = callbacks)
2-14 模型的多输入和多输出实战
多输入单输出
input_wide = keras.layers.Input(shape=[5])
input_deep = keras.layers.Input(shape=[6])
hidden1 = keras.layers.Dense(30, activation='relu')(input_deep)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat = keras.layers.concatenate([input_wide, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs = [input_wide, input_deep], outputs = [output])
model.summary()
model.compile(loss="mean_squared_error", optimizer='sgd')
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
x_train_scaled_wide = x_train_scaled[:,:5]
x_train_scaled_deep = x_train_scaled[:,2:]
x_valid_scaled_wide = x_valid_scaled[:,:5]
x_valid_scaled_deep = x_valid_scaled[:,2:]
x_test_scaled_wide = x_test_scaled[:,:5]
x_test_scaled_deep = x_test_scaled[:,2:]
history = model.fit([x_train_scaled_wide, x_train_scaled_deep], y_train,
validation_data=([x_valid_scaled_wide, x_valid_scaled_deep], y_valid),
epochs=100,
callbacks = callbacks
)
多输入多输出
比如当前预测的房价是今年的房价,如果还要预测明年的房价就要使用多输出
# input_wide和input_deep结合到一个输出,再input_deep的DNN单独一个输出
input_wide = keras.layers.Input([5])
input_deep = keras.layers.Input([6])
hidden1 = keras.layers.Dense(30, activation='relu')(input_deep)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
concat = keras.layers.concatenate([input_wide, hidden2])
output1 = keras.layers.Dense(1)(concat)
output2 = keras.layers.Dense(1)(hidden2)
model = keras.models.Model(inputs = [input_wide, input_deep],
outputs = [output1, output2])
model.summary()
model.compile(loss='mean_squared_error', optimizer='sgd')
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
x_train_scaled_wide = x_train_scaled[:,:5]
x_train_scaled_deep = x_train_scaled[:,2:]
x_valid_scaled_wide = x_valid_scaled[:,:5]
x_valid_scaled_deep = x_valid_scaled[:,2:]
x_test_scaled_wide = x_test_scaled[:,:5]
x_test_scaled_deep = x_test_scaled[:,2:]
history = model.fit([x_train_scaled_wide, x_train_scaled_deep],[y_train, y_train],
validation_data=([x_valid_scaled_wide, x_valid_scaled_deep], [y_valid, y_valid]),
epochs=100,
callbacks = callbacks
)
2-15 超参数搜索
2-16 超参数搜索
model = keras.models.Sequential([
keras.layers.Input(shape = [8]),
keras.layers.Dense(30, activation='relu'),
keras.layers.Dense(1)
])
lr= 0.0001
optimizer = keras.optimizers.SGD(lr)
model.compile(loss='mean_squared_error', optimizer=optimizer)
callbacks =[keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
history = model.fit(x_train_scaled, y_train,
validation_data = (x_valid_scaled, y_valid),
epochs=1,
callbacks = callbacks)
learning_rates = [1e-4, 3e-4, 1e-3, 3e-3, 1e-2, 3e-2]
historys = []
for lr in learning_rates:
model = keras.models.Sequential([
keras.layers.Flatten(input_shape = [8]),
keras.layers.Dense(30, activation='relu'),
keras.layers.Dense(1)
])
optimizer = keras.optimizers.SGD(lr)
model.compile(loss='mean_squared_error', optimizer=optimizer)
callbacks =[keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
history = model.fit(x_train_scaled, y_train,
validation_data = (x_valid_scaled, y_valid),
epochs=20,
callbacks = callbacks)
historys.append(history)
for lr, history in zip(learning_rates, historys):
print("lr", lr)
plot_learning_curving(history)
2-17 使用sklearn封装keras模型
# 1. 转化为sklearn的model
def build_model(hidden_layers = 1, layer_size = 30, learning_rate = 3e-3):
model = keras.models.Sequential()
model.add(keras.layers.Dense(layer_size, activation='relu',
input_shape=x_train_scaled.shape[1:] )) #
# 可以将input和第一个隐藏层写到Dense中
for _ in range(hidden_layers-1):
model.add(keras.layers.Dense(layer_size, activation='relu'))
model.add(keras.layers.Dense(1))
optimizer = keras.optimizers.SGD(learning_rate)
model.compile(loss='mse', optimizer=optimizer)
return model
callbacks = [keras.callbacks.EarlyStopping(patience=5, min_delta=1e-2)]
sklearn_model = keras.wrappers.scikit_learn.KerasRegressor(build_fn = build_model)
history = sklearn_model.fit(x_train_scaled, y_train, validation_data = (
x_valid_scaled, y_valid),
epochs = 10,
callbacks = callbacks)
2-18 sklearn封装的模型进行随机超参数搜索
from scipy.stats import reciprocal
# f(x) = 1/(x*log(b/a)) a <= x <= b
param_distribution = {
"hidden_layers" : [1,2,3,4],
"layer_size" : np.arange(1,100),
"learning_rate" : reciprocal(1e-4, 1e-2)
}
from sklearn.model_selection import RandomizedSearchCV
random_search_cv = RandomizedSearchCV(sklearn_model,
param_distribution,
n_iter = 10,
cv = 3,
n_jobs = 1) # cv:不写默认为3,将训练集分割成3份,前两份作为训练集,后一份作为cv
# x_train_scaled 总量11610,下面训练使用7740 ,最后
random_search_cv.fit(x_train_scaled, y_train,
epochs = 100,
validation_data = (x_valid_scaled, y_valid),
callbacks = callbacks
)
print(random_search_cv.best_params_)
print(random_search_cv.best_score_)
print(random_search_cv.best_estimator_)
model = random_search_cv.best_estimator_.model
model.evaluate(x_test_scaled, y_test)x'c