代码来自Self attention:自注意力——Keras,IMDB 数据 - 知乎
# %%
import pandas as pd
from keras import backend as K
from keras.datasets import imdb
from keras.engine.topology import Layer
from keras.preprocessing import sequence
from matplotlib import pyplot as plt
# https://blog.csdn.net/DawnRanger/article/details/78307244 (实现自己的Keras层,文章不错)
# keras不是使用tf.get_variable的方式生成变量,可见keras在设计时就根本没有考虑到变量共享,从之前的经验来看,要用keras设计多GPU程序是非常棘手的。(要想让Keras支持多GPU并行,必须从这一步开始修改代码,而这里已经是keras非常底层的代码了。)
# tf.get_variable() 会检查当前命名空间下是否存在同样name的变量,可以方便共享变量。而tf.Variable 每次都会新建一个变量
# https://keras.io/zh/layers/writing-your-own-keras-layers/
# 自定义Keras层
class Self_Attention(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(Self_Attention, self).__init__(**kwargs)
# input_shape: (None, 64, 128); 64 是截取(或填充)的序列长度,128 是 embedding 维度
# self.output_dim: 128
def build(self, input_shape):
# 为该层创建一个可训练的权重
# inputs.shape = (batch_size, time_steps, seq_len)
self.kernel = self.add_weight(name='kernel',
# 3: Q,K,V 各一; # input_shape[2], self.output_dim: 输入128,输出128
shape=(3, input_shape[2], self.output_dim),
initializer='uniform',
trainable=True)
super(Self_Attention, self).build(input_shape) # 一定要在最后调用它
# Attention illustration: http://jalammar.github.io/illustrated-transformer/, 对照看
# x.shape=(None, 64, 128)
def call(self, x):
WQ = K.dot(x, self.kernel[0])
WK = K.dot(x, self.kernel[1])
WV = K.dot(x, self.kernel[2])
print("WQ.shape", WQ.shape)
# WK.shape=(None, 64, 128)
# K.permute_dimensions(WK, [0, 2, 1]).shape (None, 128, 64)
print("K.permute_dimensions(WK, [0, 2, 1]).shape", K.permute_dimensions(WK, [0, 2, 1]).shape)
# https://blog.csdn.net/weixin_42078618/article/details/99050835
# 三维视图详解keras.permute_dimensions和numpy.transpose转置效果
# QK: (None, 64, 64)
QK = K.batch_dot(WQ, K.permute_dimensions(WK, [0, 2, 1]))
QK = QK / (self.output_dim ** 0.5)
QK = K.softmax(QK)
print("QK.shape", QK.shape)
#`batch_dot` is used to compute dot product of `x` and `y` when `x` and `y` are data in batches, i.e. in a shape of `(batch_size, :)`.
V = K.batch_dot(QK, WV)
return V
def compute_output_shape(self, input_shape):
print('compute_output_shape:\t',(input_shape[0], input_shape[1], self.output_dim))
return (input_shape[0], input_shape[1], self.output_dim)
# %%
max_features = 20000
print('Loading data...')
##num_words=10000的意思是训练集中我们指保留词频最高的前10000个单词。10000名之后的词汇都会被直接忽略,不出现在train_data和test_data中。
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
# 标签转换为独热码
y_train, y_test = pd.get_dummies(y_train), pd.get_dummies(y_test)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
# %%数据归一化处理
maxlen = 64
print('Pad sequences (samples x time)')
##https://zhuanlan.zhihu.com/p/105376030
##如果序列长度小于num_timesteps的值,则用参数value的值填充序列。
# 如果序列长度大于num_timesteps的值,则截断序列以满足用户要求的序列长度。填充和截断的位置取决于参数padding和truncating。
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
# %%
batch_size = 32
from keras.models import Model
from keras.optimizers import Adam
from keras.layers import *
# shape: A shape tuple (integer), not including the batch size.
# For instance, `shape=(32,)` indicates that the expected input will be batches of 32-dimensional vectors.
S_inputs = Input(shape=(64,), dtype='int32')
##max_features: 20000
embeddings = Embedding(max_features, 128)(S_inputs)
O_seq = Self_Attention(128)(embeddings)
# https://keras.io/zh/layers/pooling/ 池化层:默认batch_size在前,通道在最后,中间为1D或2D或3D维度(长宽高)
O_seq = GlobalAveragePooling1D()(O_seq)
O_seq = Dropout(0.5)(O_seq)
outputs = Dense(2, activation='softmax')(O_seq)
model = Model(inputs=S_inputs, outputs=outputs)
# https://keras.io/zh/visualization/ 模型可视化
from keras.utils import plot_model
plot_model(model, show_shapes=True, to_file='model.png')
print(model.summary())
# try using different optimizers and different optimizer configs
opt = Adam(lr=0.0002, decay=0.00001)
loss = 'categorical_crossentropy'
model.compile(loss=loss,
optimizer=opt,
metrics=['accuracy'])
# %%
print('Train...')
# https://keras.io/zh/visualization/ 训练历史可视化
#verbose: Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.
h = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=5,
verbose=2,
validation_data=(x_test, y_test))
'''
val_loss
val_accuracy
loss
accuracy
'''
for key in h.history.keys():
print(key)
plt.plot(h.history["loss"], label="train_loss")
plt.plot(h.history["val_loss"], label="val_loss")
plt.plot(h.history["accuracy"], label="train_acc")
plt.plot(h.history["val_accuracy"], label="val_acc")
plt.legend()
plt.show()
# model.save("imdb.h5")
运行报错:Object arrays cannot be loaded when allow_pickle=False
解决方法:
1)找到所在imdb.py
中的所在行
2)将np.load(path)
改为np.load(path, allow_pickle=True)
OK,get it.