python - 对 '数码大冒险tri 泡泡评论' 进行简单的情感分析

最新推荐文章于 2024-07-28 20:33:06 发布

PeersLee

最新推荐文章于 2024-07-28 20:33:06 发布

阅读量913

点赞数

文章标签： python 情感分析 jieba

本文链接：https://blog.csdn.net/PeersLee/article/details/74380276

版权

爬虫

selenium 抓取‘楚乔传’ 评论

NLP

import jieba
import numpy as np
import pymongo

from NLP.Config import *

'''
db
'''
client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]

def get_comments_from_db(count=100):
    try:
        comments_list = [msg['comment'] for msg in db[MONGO_TABLE].find().limit(count)]
        return comments_list
    except Exception as e:
        print(e.args)
        return None
'''
将txt转为list
'''
def get_list_from_file(name=''):
    path = './{}.txt'.format(name)
    words = []
    with open(path, mode='r', encoding='utf-8') as f:
        for w in f:
            words.append(w.strip())
    return words

'''
几个词典
'''
stop_words = get_list_from_file(name='stopwords')
refute_words = get_list_from_file(name='refute')
nega_words = get_list_from_file(name='negative')
posi_words = get_list_from_file(name='positive')

degree_words = get_list_from_file(name='degree')
degree_index_list = ['extreme', 'very', 'more', 'ish', 'last']
degree_dict = {}
for i in range(4):
    first_index = degree_index_list[i]
    second_index = degree_index_list[i+1]
    degree_dict[first_index]\
        = degree_words[degree_words.index(first_index)+1 : degree_words.index(second_index)]

'''
1. 分词(情感词, 否定词, 程度词/号, )
2. 评论的情感值的均值与方差
'''
def sentiment_value(**kwargs):
    comment = kwargs['comment']
    words = [w.strip() for w in jieba.cut(comment, cut_all=False) if w not in stop_words]
    sent_value_list = []
    # 对每个word求一次情感值
    pre_index = 0
    print(words)
    for word in list(words):
        seg_sent_value = 0
        # 求情感值,
        if word in posi_words:
            seg_sent_value += POSI_VALUE
        elif word in nega_words:
            seg_sent_value += NEGA_VALUE
        if seg_sent_value != 0:
            index = words.index(word)
            for w in words[pre_index : index]:
                if w in degree_dict['extreme']:
                    seg_sent_value *= EXTREME_VALUE
                elif w in degree_dict['very']:
                    seg_sent_value *= VERY_VALUE
                elif w in degree_dict['more']:
                    seg_sent_value *= MORE_VALUE
                elif w in degree_dict['ish']:
                    seg_sent_value *= ISH_VALUE
                elif w in refute_words:
                    seg_sent_value *= REFUTE_VALUE
            pre_index = index + 1
            sent_value_list.append(seg_sent_value)
    if sent_value_list:
        arr = np.array(sent_value_list)

        print(arr)

        words_value_dict = {
            'sum' : arr.sum(),
            'avg' : arr.mean(),
            'std' : arr.std()
        }
        return words_value_dict
    else:
        return None

def run():
    for comment in get_comments_from_db():
        print(sentiment_value(comment=comment))

if __name__ == '__main__':
    run()

链接: https://pan.baidu.com/s/1kVn4Hx1 密码: y4xa