之前一直都是用bilstmcrf在做ner识别,前两天在GitHub看到一个c++ 分词工具,作者提供两种方式用于做分词,一种是bi-lstm结合crf的形式,一种是膨胀卷积+crf的形式,总体来讲原理还是比较简单,而且思想都来源于15年的一篇论文https://arxiv.org/abs/1511.07122 ,后面人根据这篇论文的扩展到做ner识别,也是一篇比较出名的文章,昨天花时间写了下代码,训练一版本模型,总体来说准确率和bilstm-crf旗鼓相当,但是预测速度比bilstm-crf要快一点,下面直接看下代码:
模型一部分训练代码,在GPU上训练比bilstm也要快:
def embeding(self,input_x):
with tf.name_scope("word_embedding"):
self.w_word = tf.Variable(tf.random_uniform([self.word_vocab_size, self.word_embedd_dim], -1, 1), trainable=True,
name="w_word")
embedded_words = tf.nn.embedding_lookup(self.w_word, input_x, name="embedded_words")
word_vectors = tf.expand_dims(embedded_words, 1)
return word_vectors
def inference(self, X, reuse=False):
with tf.variable_scope("idcnn", reuse=reuse):
filter_weights = tf.get_variable(
"idcnn_filter",
shape=[1, self.filterWidth, self.word_embedd_dim,
self.num_filter],
initializer=tf.contrib.layers.xavier_initializer())
layerInput = tf.nn.conv2d(X,
filter_weights,
strides=[1, 1, 1, 1],
padding="SAME",
name="init_layer")
finalOutFromLayers = []
totalWidthForLastDim = 0
#set by yourself 4
for j in range(4):
for i in range(len(self.layers)):
dilation = self.layers[i]['dilation']
isLast = True if i == (len(self.layers) - 1) else False
with tf.variable_scope("atrous-conv-layer-%d" % i,
reuse=True
if (reuse or j > 0) else False):
w = tf.get_variable(
"filterW",
shape=[1, self.filterWidth, self.num_filter,
self.num_filter],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable("filterB", shape=[self.num_filter])
conv = tf.nn.atrous_conv2d(layerInput,
w,
rate=dilation,
padding="SAME")
conv = tf.nn.bias_add(conv, b)
conv = tf.nn.relu(conv)
if isLast:
finalOutFromLayers.append(conv)
totalWidthForLastDim += self.num_filter
layerInput = conv
finalOut = tf.concat(axis=3, values=finalOutFromLayers)
finalOut = tf.nn.dropout(finalOut, self.dropout_keep_prob)
finalOut = tf.squeeze(finalOut, [1])
finalOut = tf.reshape(finalOut, [-1, totalWidthForLastDim])
finalW = tf.get_variable(
"finalW",
shape=[totalWidthForLastDim, self.num_tags],
initializer=tf.contrib.layers.xavier_initializer())
finalB = tf.get_variable("finalB",
initializer=tf.constant(
0.001, shape=[self.num_tags]))
scores = tf.nn.xw_plus_b(finalOut, finalW, finalB, name="scores")
if reuse:
scores = tf.reshape(scores, [self.batch_size, -1, self.num_tags],
name="Reshape_7")
else:
scores = tf.reshape(scores, [self.batch_size, -1, self.num_tags],
name=None)
return scores
模型保存的之后c++加载
#include <iostream>
#include <vector>
#include "tensorflow/cc/saved_model/loader.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/protobuf/meta_graph.pb.h"
#include "tensorflow/cc/saved_model/tag_constants.h"
using namespace std;
using namespace tensorflow;
int main(int argc ,char *argv[]) {
string modelpath;
if(argc<2){
cout<<"请输入模型路径";
return 0;
}else{
modelpath=argv[1];
}
tensorflow::SessionOptions sess_options;
tensorflow::RunOptions run_options;
tensorflow::SavedModelBundle bundle;
Status status;
status =tensorflow::LoadSavedModel(sess_options, run_options, modelpath, {tensorflow::kSavedModelTagServe}, &bundle);
if(!status.ok()){
cout<<status.ToString()<<endl;
}
tensorflow::MetaGraphDef graph_def = bundle.meta_graph_def;
std::unique_ptr<tensorflow::Session>& session = bundle.session;
vector<int> vec={7997, 1945, 8471, 14127, 17565, 7340, 20224, 17529, 3796, 16033, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
int ndim=vec.size();
Tensor x(tensorflow::DT_INT32, tensorflow::TensorShape({1, ndim})); // New Tensor shape [1, ndim]
auto x_map = x.tensor<int, 2>();
for (int j = 0; j < ndim; j++) {
x_map(0, j) = vec[j];
}
std::vector<std::pair<string, tensorflow::Tensor>> inputs;
inputs.push_back(std::pair<std::string, tensorflow::Tensor>("input_x", x));
Tensor keep_prob(tensorflow::DT_FLOAT, tensorflow::TensorShape({1}));
keep_prob.vec<float>()(0) = 1.0f;
inputs.push_back(std::pair<std::string, tensorflow::Tensor>("keep_prob", keep_prob));
Tensor tensor_out(tensorflow::DT_INT32, TensorShape({1,ndim}));
std::vector<tensorflow::Tensor> outputs={{ tensor_out }};
status= session->Run(inputs, {"crf_pred/ReverseSequence_1"}, {}, &outputs);
if (!status.ok()) {
std::cout << status.ToString() << "\n";
return 1;
}
for(int i=0;i<40;++i) {
std::cout << outputs[0].matrix<int>()(0,i)<<" ";
}
cout<<endl;
return 0;
}