卷积例子:
inputs = tf.placeholder(‘float’, shape=[None, 6, 8])
out = tf.layers.conv1d(inputs, 5, 3)
说明: 对于一个样本而言,句子有6个词,词向量的维度为8,filters=5(输出维度为5), kernel_size=3(卷积核大小为3), 因为是一维卷积,所以卷积核的大小为3,那么输入6经过3的卷积核卷积后得到的是4的一个向量(4=6-3+1),又因为有5个过滤器,所以是得到5个4*1
的向量(一维卷积的只能在一个维度上进行卷积,第二个维度在卷积之后的维度大小为1)
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.sequence import pad_sequences
#定义超参
num_features = 3000
sequence_length = 300
embedding_dimension = 100
#加载数据集
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=num_features)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
#文本的padding处理
x_train = pad_sequences(x_train, maxlen=sequence_length)
x_test = pad_sequences(x_test, maxlen=sequence_length)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
基本的文本卷积
构建模型
def imdb_cnn():
model = keras.Sequential([
layers.Embedding(input_dim=num_features, output_dim=embedding_dimension,input_length=sequence_length),
#input_dim词汇表的大小(取数据的时候取的词汇频率最高的num_features个词汇,所以词汇表的大小为num_features)
#input_length输入句子padding之后的长度
#输出为sequence_length*embedding_dimension(300*100)
layers.Conv1D(filters=50, kernel_size=5, strides=1, padding='valid'),
#卷积核大小5,50个过滤器,输出为(sequence_length-kernel_size+1)*50
layers.MaxPool1D(pool_size=2, padding='valid'),#每两个过滤器进行一次maxpool得到(sequence_length-kernel_size+1)/2*50
layers.Flatten(),#将结果展平,作为之后网络的输入
layers.Dense(10, activation='relu'),
layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer=keras.optimizers.Adam(1e-3),
loss=keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
return model
model = imdb_cnn()
for i in range(len(model.layers)):#查看各层的输出shape
print(model.get_layer(index=i).output)
model.summary()
输出:
Tensor("embedding_4/Identity:0", shape=(None, 300, 100), dtype=float32)
Tensor("conv1d_1/Identity:0", shape=(None, 296, 50), dtype=float32)
Tensor("max_pooling1d_1/Identity:0", shape=(None, 148, 50), dtype=float32)
Tensor("flatten_3/Identity:0", shape=(None, 7400), dtype=float32)
Tensor("dense_6/Identity:0", shape=(None, 10), dtype=float32)
Tensor("dense_7/Identity:0", shape=(None, 1), dtype=float32)
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_4 (Embedding) (None, 300, 100) 300000
_________________________________________________________________
conv1d_1 (Conv1D) (None, 296, 50) 25050
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 148, 50) 0
_________________________________________________________________
flatten_3 (Flatten) (None, 7400) 0
_________________________________________________________________
dense_6 (Dense) (None, 10) 74010
_________________________________________________________________
dense_7 (Dense) (None, 1) 11
=================================================================
Total params: 399,071
Trainable params: 399,071
Non-trainable params: 0
_________________________________________________________________
模型训练
%%time
history = model.fit(x_train, y_train, batch_size=64, epochs=5, validation_split=0.1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['training', 'valiation'], loc='upper left')
plt.show()
多核卷积网络
filter_sizes=[3,4,5]#定义三种不同大小的卷积核
def convolution():#函数式加入多核卷积的写法
inn = layers.Input(shape=(sequence_length, embedding_dimension, 1))#定义函数的输入层
cnns = []
for size in filter_sizes:#不同大小的卷积核
conv = layers.Conv2D(filters=64, kernel_size=(size, embedding_dimension),#卷积核大小为size*embedding_dimension
strides=1, padding='valid', activation='relu')(inn)#64个卷积核,输出为64个(sequence_length-kernel_size+1)*11
pool = layers.MaxPool2D(pool_size=(sequence_length-size+1, 1), padding='valid')(conv)#每个卷积核的结果只保存最大的
cnns.append(pool)
#所以for的每一次循环都得到64个卷积结果,三次循环就得到192个,以此作为多核卷积的输出结果
outt = layers.concatenate(cnns)#函数的输出层
model = keras.Model(inputs=inn, outputs=outt)
return model
def cnn_mulfilter():
model = keras.Sequential([
layers.Embedding(input_dim=num_features, output_dim=embedding_dimension,
input_length=sequence_length),
layers.Reshape((sequence_length, embedding_dimension, 1)),#二维卷积
convolution(),
layers.Flatten(),
layers.Dense(10, activation='relu'),
layers.Dropout(0.2),
layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer=keras.optimizers.Adam(),
loss=keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
return model
model = cnn_mulfilter()
for i in range(len(model.layers)):#查看各层的输出shape
print(model.get_layer(index=i).output)
model.summary()
输出:
Tensor("embedding_3/Identity:0", shape=(None, 300, 100), dtype=float32)
Tensor("reshape_1/Identity:0", shape=(None, 300, 100, 1), dtype=float32)
Tensor("concatenate_1/Identity:0", shape=(None, 1, 1, 192), dtype=float32)
Tensor("flatten_2/Identity:0", shape=(None, 192), dtype=float32)
Tensor("dense_4/Identity:0", shape=(None, 10), dtype=float32)
Tensor("dropout_1/Identity:0", shape=(None, 10), dtype=float32)
Tensor("dense_5/Identity:0", shape=(None, 1), dtype=float32)
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_3 (Embedding) (None, 300, 100) 300000
_________________________________________________________________
reshape_1 (Reshape) (None, 300, 100, 1) 0
_________________________________________________________________
model_1 (Model) (None, 1, 1, 192) 76992
_________________________________________________________________
flatten_2 (Flatten) (None, 192) 0
_________________________________________________________________
dense_4 (Dense) (None, 10) 1930
_________________________________________________________________
dropout_1 (Dropout) (None, 10) 0
_________________________________________________________________
dense_5 (Dense) (None, 1) 11
=================================================================
Total params: 378,933
Trainable params: 378,933
Non-trainable params: 0
_________________________________________________________________
模型训练
%%time
history = model.fit(x_train, y_train, batch_size=64, epochs=5, validation_split=0.1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['training', 'valiation'], loc='upper left')
plt.show()