CNN文本分类
import warnings
warnings.filterwarnings("ignore")
import numpy as np
#import matplotlib.pylot as plt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.sequence import pad_sequences
num_features=3000
sequence_length=300
embedding_dimension=100
(x_train,y_train),(x_test,y_test)=keras.datasets.imdb.load_data(num_words=num_features)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
run
(25000,)
(25000,)
(25000,)
(25000,)
x_train=pad_sequences(x_train,maxlen=sequences_length)
x_test=pad_sequences(x_test,maxlen=sequences_length)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
run
(25000, 300)
(25000, 300)
(25000,)
(25000,)
filter_size=[3,4,5]
def convolution():
inn=layers.Input(shape=(sequences_length,embedding_dimension,1))
cnns=[]
for size in filter_size:
conv=layers.Conv2D(filters=64,kernel_size=(size,embedding_dimension),strides=1,padding='valid',activation='relu')(inn)
pool=layers.MaxPool2D(pool_size=(sequence_length-size+1,1),padding='valid')(conv)
cnns.append(pool)
outt=layers.concatenate(cnns)
model=keras.Model(inputs=inn,outputs=outt)
return model
def cnn_mulfilter():
model=keras.Sequential([layers.Embedding(input_dim=num_features,output_dim=embedding_dimension,input_length=sequence_length),
layers.Reshape((sequence_length,embedding_dimension,1)),
convolution(),
layers.Flatten(),
layers.Dense(10,activation='relu'),
layers.Dropout(0.2),
layers.Dense(1,activation='sigmoid')])
model.compile(optimizer=keras.optimizers.Adam(),loss=keras.losses.BinaryCrossentropy(),metrics=['accuracy'])
return model
model=cnn_mulfilter()
model.summary()
run
Model: “sequential_1”
Layer (type) Output Shape Param #
embedding_4 (Embedding) (None, 300, 100) 300000
reshape_4 (Reshape) (None, 300, 100, 1) 0
model_1 (Functional) (None, 1, 1, 192) 76992
flatten_1 (Flatten) (None, 192) 0
dense_2 (Dense) (None, 10) 1930
dropout_1 (Dropout) (None, 10) 0
dense_3 (Dense) (None, 1) 11
Total params: 378,933
Trainable params: 378,933
Non-trainable params: 0
history=model.fit(x_train,y_train,batch_size=64,epochs=5,validation_split=0.1)
run
Epoch 1/5
352/352 [] - 34s 44ms/step - loss: 0.4525 - accuracy: 0.7779 - val_loss: 0.2980 - val_accuracy: 0.8820
Epoch 2/5
352/352 [] - 9s 26ms/step - loss: 0.2624 - accuracy: 0.8979 - val_loss: 0.2695 - val_accuracy: 0.8928
Epoch 3/5
352/352 [] - 9s 26ms/step - loss: 0.1841 - accuracy: 0.9348 - val_loss: 0.2848 - val_accuracy: 0.8912
Epoch 4/5
352/352 [] - 9s 26ms/step - loss: 0.1242 - accuracy: 0.9611 - val_loss: 0.3109 - val_accuracy: 0.8872
Epoch 5/5
352/352 [==============================] - 9s 26ms/step - loss: 0.0760 - accuracy: 0.9798 - val_loss: 0.3482 - val_accuracy: 0.8876