python - 对 '数码大冒险tri 泡泡评论' 进行简单的情感分析

爬虫



NLP


import jieba
import numpy as np
import pymongo

from NLP.Config import *

'''
db
'''
client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]

def get_comments_from_db(count=100):
    try:
        comments_list = [msg['comment'] for msg in db[MONGO_TABLE].find().limit(count)]
        return comments_list
    except Exception as e:
        print(e.args)
        return None
'''
将txt转为list
'''
def get_list_from_file(name=''):
    path = './{}.txt'.format(name)
    words = []
    with open(path, mode='r', encoding='utf-8') as f:
        for w in f:
            words.append(w.strip())
    return words

'''
几个词典
'''
stop_words = get_list_from_file(name='stopwords')
refute_words = get_list_from_file(name='refute')
nega_words = get_list_from_file(name='negative')
posi_words = get_list_from_file(name='positive')

degree_words = get_list_from_file(name='degree')
degree_index_list = ['extreme', 'very', 'more', 'ish', 'last']
degree_dict = {}
for i in range(4):
    first_index = degree_index_list[i]
    second_index = degree_index_list[i+1]
    degree_dict[first_index]\
        = degree_words[degree_words.index(first_index)+1 : degree_words.index(second_index)]

'''
1. 分词(情感词, 否定词, 程度词/号, )
2. 评论的情感值的均值与方差
'''
def sentiment_value(**kwargs):
    comment = kwargs['comment']
    words = [w.strip() for w in jieba.cut(comment, cut_all=False) if w not in stop_words]
    sent_value_list = []
    # 对每个word求一次情感值
    pre_index = 0
    print(words)
    for word in list(words):
        seg_sent_value = 0
        # 求情感值,
        if word in posi_words:
            seg_sent_value += POSI_VALUE
        elif word in nega_words:
            seg_sent_value += NEGA_VALUE
        if seg_sent_value != 0:
            index = words.index(word)
            for w in words[pre_index : index]:
                if w in degree_dict['extreme']:
                    seg_sent_value *= EXTREME_VALUE
                elif w in degree_dict['very']:
                    seg_sent_value *= VERY_VALUE
                elif w in degree_dict['more']:
                    seg_sent_value *= MORE_VALUE
                elif w in degree_dict['ish']:
                    seg_sent_value *= ISH_VALUE
                elif w in refute_words:
                    seg_sent_value *= REFUTE_VALUE
            pre_index = index + 1
            sent_value_list.append(seg_sent_value)
    if sent_value_list:
        arr = np.array(sent_value_list)

        print(arr)

        words_value_dict = {
            'sum' : arr.sum(),
            'avg' : arr.mean(),
            'std' : arr.std()
        }
        return words_value_dict
    else:
        return None

def run():
    for comment in get_comments_from_db():
        print(sentiment_value(comment=comment))

if __name__ == '__main__':
    run()


链接: https://pan.baidu.com/s/1kVn4Hx1 密码: y4xa
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

PeersLee

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值