anaconda中运行时出现
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Anaconda3_64\lib\site-packages\sklearn\metrics\classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
'precision', 'predicted', average, warn_for)
ps:可能第二次运行的时候,警告就消失了
参考链接:https://www.itranslater.com/qa/details/2582485825728545792
参考链接:https://blog.csdn.net/So_that/article/details/89787366
CNN——Flatten layer
参考链接:https://www.jianshu.com/p/32756e5dd969
keras用plot_model函数画模型的结构图,保存为图片
首先是model.summary()
lstm_test_model.summary(line_length=200,positions=[0.30,0.60,0.7,1.0])
plot_model()保存模型结构图
#导入下面的库
from tensorflow.keras.utils import plot_model
import pydotplus
#参数 :模型名称,结构图保存位置,是否展示shape
plot_model(lstm_test_model,to_file='lstm_test_model.png',show_shapes=True)
含有参数的模型函数的结果保存
代码:
# w2v_model=Word2Vec.load('sentiment_analysis/w2v_model.pkl')
# # 预训练的词向量中没有出现的词用0向量表示
# embedding_matrix = np.zeros((len(vocab) + 1, 300))
# for word, i in vocab.items():
# try:
# embedding_vector = w2v_model[str(word)]
# embedding_matrix[i] = embedding_vector
# except KeyError:
# continue
# 构建TextCNN模型
def TextCNN_model_2(x_train_padded_seqs, y_train, x_test_padded_seqs, y_test, embedding_matrix):
# 模型结构:词嵌入-卷积池化*3-拼接-全连接-dropout-全连接
main_input = Input(shape=(3397,), dtype='float64')
# 词嵌入(使用预训练的词向量)
# embedder = Embedding(len(word2idx) + 1, 300, input_length=19422, weights=[embedding_matrix], trainable=False)
# 之前训练word2vec时,停用词已经加过1了,因此这里不再加了,将上面的一句改为下面的一句
embedder = Embedding(len(word2idx), 300, input_length=3397, weights=[embedding_matrix], trainable=False)
# embedder = Embedding(len(vocab) + 1, 300, input_length=50, trainable=False)
# print(embedding_matrix[1]) #与前面训练好的embedding_matrix[1]结果相同
embed = embedder(main_input)
# 词窗大小分别为3,4,5
cnn1 = Conv1D(256, 3, padding='same', strides=1, activation='relu')(embed)
cnn1 = MaxPooling1D(pool_size=38)(cnn1)
cnn2 = Conv1D(256, 4, padding='same', strides=1, activation='relu')(embed)
cnn2 = MaxPooling1D(pool_size=37)(cnn2)
cnn3 = Conv1D(256, 5, padding='same', strides=1, activation='relu')(embed)
cnn3 = MaxPooling1D(pool_size=36)(cnn3)
# 合并三个模型的输出向量
cnn = concatenate([cnn1, cnn2, cnn3], axis=1)
flat = Flatten()(cnn)
drop = Dropout(0.2)(flat)
main_output = Dense(9, activation='softmax')(drop)
model = Model(inputs=main_input, outputs=main_output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train_padded_seqs, y_train, batch_size=2, epochs=2)
result = model.predict(x_test_padded_seqs) # 预测样本属于每个类别的概率
result_labels = np.argmax(result, axis=1) # 获得最大概率对应的标签
y_predict = result_labels
y_tests = np.argmax(y_test, axis=1)
print('准确率', metrics.accuracy_score(y_tests, y_predict))
print('平均f1-score:', metrics.f1_score(y_tests, y_predict, average='weighted'))
修改之后的代码:
from keras.layers import Conv1D, MaxPool1D, Dense, Flatten, concatenate, Embedding
from keras.layers import MaxPooling1D,normalization
from sklearn import metrics
from keras.models import Model
# 构建TextCNN模型
def TextCNN_model_2(embedding_matrix):
# 模型结构:词嵌入-卷积池化*3-拼接-全连接-dropout-全连接
main_input = Input(shape=(19422,), dtype='float64')
# 词嵌入(使用预训练的词向量)
embedder = Embedding(len(word2idx), 300, input_length=19422, weights=[embedding_matrix], trainable=False)
embed = embedder(main_input)
# 词窗大小分别为3,4,5
cnn1 = Conv1D(256, 3, padding='same', strides=1, activation='relu')(embed)
cnn1 = MaxPooling1D(pool_size=38)(cnn1)
cnn2 = Conv1D(256, 4, padding='same', strides=1, activation='relu')(embed)
cnn2 = MaxPooling1D(pool_size=37)(cnn2)
cnn3 = Conv1D(256, 5, padding='same', strides=1, activation='relu')(embed)
cnn3 = MaxPooling1D(pool_size=36)(cnn3)
# 合并三个模型的输出向量
cnn = concatenate([cnn1, cnn2, cnn3], axis=1)
flat = Flatten()(cnn)
drop = Dropout(0.2)(flat)
main_output = Dense(3, activation='softmax')(drop)
model = Model(inputs=main_input, outputs=main_output)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
return model
然后再是:
model = TextCNN_model_2(embedMatrix)
model.fit(X_tra_idx, y_tra_idx, validation_data=(X_val_idx, y_val_idx),
epochs=3, batch_size=10, verbose=1)
#导入下面的库
from tensorflow.keras.utils import plot_model
import pydotplus
#参数 :模型名称,结构图保存位置,是否展示shape
plot_model(model,to_file='textCNN_model.png',show_shapes=True)
即可得到保存的textCNN模型结构图