flask实现文本情感分析服务

3 篇文章 0 订阅
1 篇文章 0 订阅

用RNN模型实现文本情感分析

 

直接上代码吧!!!

# encoding:utf-8
from meinheld import server
from predict_rnn import RnnModel
import time
import logging
from logging.handlers import TimedRotatingFileHandler
import json
from flask import Flask, request

app = Flask(__name__)


def setLog():
    log_fmt = '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'
    formatter = logging.Formatter(log_fmt)
    fh = TimedRotatingFileHandler(
        filename="log/run_textPredict_server" + str(time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) + ".log",
        when="H", interval=1,
        backupCount=72)
    fh.setFormatter(formatter)
    logging.basicConfig(level=logging.INFO)
    log = logging.getLogger()
    log.addHandler(fh)


setLog()
rnn_model = RnnModel()


@app.route('/ai/v1/TextEmotionAnalyse', methods=['POST'])
def textRNNClassPredict():
    try:
        start_time = time.time()
        resParm = request.data
        # 转字符串
        resParm = str(resParm, encoding="utf-8")
        resParm = eval(resParm)

        requestId = resParm.get('requestId')
        # 服务鉴权
        token = resParm.get('token')
        if not token:
            res = {'code': 3, 'msg': 'token fail'}
            logging.error("code: 3 msg:  token fail ")
            return json.dumps(res)

        # req_json = request.get_json(silent=False)
        # content = req_json.get('content')
        strContent = resParm.get('inputStr')
        if not strContent:
            res = {'code': 4, 'msg': ' input string param invalid'}
            logging.error("code: 4  msg:  input string param invalid")
            return json.dumps(res)

        # 判定字符串长度
        if len(strContent) > 600:
            res = {'code': 5, 'msg': 'input string param length invalid'}
            logging.error("code: 5 msg: input string param length invalid")
            return json.dumps(res)
        time_predict = time.time()
        resFlag = rnn_model.predict(strContent)
        logging.info(f"text analyse predict cost Time is: {str(time.time() - time_predict)} ")

        if resFlag is None:
            res = {'code': 2, 'msg': 'text analyse except, fail'}
            logging.error("code: 2 msg: text analyse except, analyse fail")
            return json.dumps(res)
        print("\n\nresFlag:   " + str(resFlag))
        resEmotion = 0
        positive_prob = resFlag[0][0]
        negative_prob = resFlag[0][1]

        if positive_prob >= 0.53:  # 正向概率大于等于0.52   1 , 0, -1
            resEmotion = 1
            logging.info(f"情感为正向,概率为: {str(positive_prob)}")
        if positive_prob <= 0.48:  # 正向概率小于等于0.48
            resEmotion = -1
            logging.info(f"情感为负向,概率为: {str(negative_prob)}")
        elif positive_prob > 0.48 and positive_prob < 0.53:  # 正向概率大于0.48小于0.52
            resEmotion = 0
            logging.info(f"情感为中性,概率为: {str(max(negative_prob, positive_prob))}")

        timeUsed = time.time() - start_time
        data = {'requestId': requestId, 'emotionAnalyseRes': resEmotion, 'timeUsed': timeUsed}
        res = {'code': 0, 'msg': 'success', 'data': data}
        logging.info(f"code:0  msg:success  text analyse cost Time is: {str(timeUsed)} ")
        return json.dumps(res)
    except Exception as e:
        logging.exception(e)
        res = {'code': 6, 'msg': 'request exception'}
        return json.dumps(res)


if __name__ == "__main__":
    logging.info('Starting the server...')
    server.listen(("0.0.0.0", 8885))
    server.run(app)
    # app.run(host='0.0.0.0', port=8885, threaded=True)

 

# encoding: utf-8

from __future__ import print_function
import time
from datetime import timedelta
import os
import tensorflow as tf
import tensorflow.contrib.keras as kr

# from cnn_model import TCNNConfig, TextCNN
from rnn_model import TRNNConfig, TextRNN
from data.cnews_loader import read_category, read_vocab

import pandas as pd

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

try:
    bool(type(unicode))
except NameError:
    unicode = str

base_dir = 'data/cnews/'
vocab_dir = os.path.join(base_dir, 'cnews.vocab.txt')

save_dir = 'checkpoints/testtextrnn'
save_path = os.path.join(save_dir, 'best_validation_0.8590335594711533')  # 最佳验证结果保存路径


class RnnModel:
    def __init__(self):
        self.config = TRNNConfig()
        self.categories, self.cat_to_id = read_category()
        self.words, self.word_to_id = read_vocab(vocab_dir)
        self.config.vocab_size = len(self.words)
        self.model = TextRNN(self.config)

        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess=self.session, save_path=save_path)  # 读取保存的模型

    def predict(self, message):
        # 支持不论在python2还是python3下训练的模型都可以在2或者3的环境下运行
        content = unicode(message)
        print(content)
        data = [self.word_to_id[x] for x in content if x in self.word_to_id]

        feed_dict = {
            self.model.input_x: kr.preprocessing.sequence.pad_sequences([data], self.config.seq_length),
            self.model.keep_prob: 1.0
        }

        y_pred_cls = self.session.run(self.model.predict, feed_dict=feed_dict)
        return y_pred_cls
        # return self.categories[y_pred_cls[0]]


def get_time_dif(start_time):
    """获取已使用时间"""
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))


if __name__ == '__main__':
    # start_time = time.time()
    rnn_model = RnnModel()
    # testfilepath = 'data/cnews/textrnn_our_model_data.txt'
    # df1 = pd.DataFrame(pd.read_csv(testfilepath,sep='\t', encoding='utf-8', error_bad_lines=False, header=None))
    # df2 = df1.copy()
    # df2.columns = ['label', 'content']
    # df2['prelabel'] = df2['content'].apply(lambda x: rnn_model.predict(x))
    # print("终于执行完了")
    # df3 = df2[['label','prelabel', 'content']]
    # df3.to_csv('data/cnews/textrnn_our_model_5000_result.txt',index=False)
    # time_dif = get_time_dif(start_time)
    # print("Time usage:", time_dif)
    test_demo = ['真的佩服自己能把这部电影看完',
                 '这个电影太一般',"这个电影一般","这个电影真垃圾","这个电影真好看"]
    for i in test_demo:
        flag = rnn_model.predict(i)
        positive_prob = flag[0][0]
        negative_prob = flag[0][1]

        if positive_prob >= 0.53:#正向概率大于等于0.52
            print("情感为正向,概率为:"+str(positive_prob))
        if positive_prob <= 0.48:#正向概率小于等于0.48
            print("情感为负向,概率为:"+str(negative_prob))
        elif positive_prob >= 0.48 and  positive_prob <= 0.53:#正向概率大于0.48小于0.52
            print("情感为中性,概率为:"+str(max(negative_prob,positive_prob)))
        # print(type(flag))
        # if int(flag) == 2:
        #     print("情感为正向")
        # if int(flag) == 0:
        #     print("情感为负向")

 

rnn_model.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

import tensorflow as tf

class TRNNConfig(object):
    """RNN配置参数"""

    # 模型参数
    embedding_dim = 128      # 词向量维度
    seq_length = 600        # 序列长度
    num_classes = 2        # 类别数
    vocab_size = 20000       # 词汇表达小

    num_layers= 2           # 隐藏层层数
    hidden_dim = 128        # 隐藏层神经元
    rnn = 'gru'             # lstm 或 gru

    dropout_keep_prob = 0.8 # dropout保留比例
    learning_rate = 1e-3    # 学习率

    batch_size = 128         # 每批训练大小
    num_epochs = 10          # 总迭代轮次

    print_per_batch = 100    # 每多少轮输出一次结果
    save_per_batch = 10      # 每多少轮存入tensorboard


class TextRNN(object):
    """文本分类,RNN模型"""
    def __init__(self, config):
        self.config = config

        # 三个待输入的数据
        self.input_x = tf.placeholder(tf.int32, [None, self.config.seq_length], name='input_x')
        self.input_y = tf.placeholder(tf.float32, [None, self.config.num_classes], name='input_y')
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')

        self.rnn()

    def rnn(self):
        """rnn模型"""

        def lstm_cell():   # lstm核
            return tf.contrib.rnn.BasicLSTMCell(self.config.hidden_dim, state_is_tuple=True)

        def gru_cell():  # gru核
            return tf.contrib.rnn.GRUCell(self.config.hidden_dim)

        def dropout(): # 为每一个rnn核后面加一个dropout层
            if (self.config.rnn == 'lstm'):
                cell = lstm_cell()
            else:
                cell = gru_cell()
            return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)

        # 词向量映射
        with tf.device('/gpu:0'):
            embedding = tf.get_variable('embedding', [self.config.vocab_size, self.config.embedding_dim])
            embedding_inputs = tf.nn.embedding_lookup(embedding, self.input_x)

        with tf.name_scope("rnn"):
            # 多层rnn网络
            cells = [dropout() for _ in range(self.config.num_layers)]
            rnn_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)

            _outputs, _ = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=embedding_inputs, dtype=tf.float32)
            last = _outputs[:, -1, :]  # 取最后一个时序输出作为结果

        with tf.name_scope("score"):
            # 全连接层,后面接dropout以及relu激活
            fc = tf.layers.dense(last, self.config.hidden_dim, name='fc1')
            fc = tf.contrib.layers.dropout(fc, self.keep_prob)
            fc = tf.nn.relu(fc)

            # 分类器
            self.logits = tf.layers.dense(fc, self.config.num_classes, name='fc2')
            self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1)  # 预测类别

        with tf.name_scope("optimize"):
            # 损失函数,交叉熵
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_y)
            self.loss = tf.reduce_mean(cross_entropy)
            # 优化器
            self.optim = tf.train.AdamOptimizer(learning_rate=self.config.learning_rate).minimize(self.loss)

        with tf.name_scope("accuracy"):
            # 准确率
            correct_pred = tf.equal(tf.argmax(self.input_y, 1), self.y_pred_cls)
            self.acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

 

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值