不说废话,直接三步搭建最简单的bert文本多标签分类器
1.去官网https://github.com/google-research/bert 下载一个bert模型
2.搭建bert-service https://github.com/hanxiao/bert-as-service
3.分类demo
mb = MultiLabelBinarizer() dataset = pd.read_csv('train.csv') x = dataset['corpus'] y = [_.split("&&&&") for _ in dataset['label']] y = mb.fit_transform(np.array(y)) x_train, x_test, y_train, y_test = train_test_split(x, y) bc = BertClient() X_train = bc.encode(x_train) X_test = bc.encode(x_test) model = Sequential() model.add(Dense(100, activation='relu', input_dim=768)) model.add(Dropout(0.5)) model.add(Dense(5, activation='sigmoid')) model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy']) H = model.fit(X_train, y_train, epochs=15, validation_split=0.2) print(H.history()) predictions = model.predict(X_test) predictions = predictions.argmax(axis=1) print(classification_report(y_test.argmax(axis=1), predictions))
end
!!!!!!!!!!!!!!!!!!!!!!