基于情感词典的情感值分析

#utils.py
import re


class ToolGeneral():
    """
    Tool function
    """
    def is_odd(self,num):
        if num % 2 == 0:
            return 'even'
        else:
            return 'odd'        

    def load_dict(self,file):  
        """
        Load dictionary
        """
        with  open(file,encoding='utf-8',errors='ignore') as fp:
            lines = fp.readlines()
            lines = [l.strip() for l in lines]
            print("Load data from file (%s) finished !"%file)
            dictionary = [word.strip() for word in lines]
        return set(dictionary)
    

    def sentence_split_regex(self,sentence):
        """
        Segmentation of sentence
        """
        if sentence is not None:
            sentence = re.sub(r"–+|—+", "-", sentence)
            sub_sentence = re.split(r"[。,,!!??;;\s…~~]+|\.{2,}|…+|&nbsp+|_n|_t", sentence)
            sub_sentence = [s for s in sub_sentence if s != '']
            if sub_sentence != []:
                return sub_sentence
            else:
                return [sentence]
        return []
    
    
if __name__ == "__main__": 
    #
    tool = ToolGeneral()    
    #
    s = '"白云山澄清复方板蓝根尚存不确定性 提醒投资者不要见风就是雨"'
    ls = tool.sentence_split_regex(s)
    print(ls)   

#加载情感字典文件

#hyperparameters.py

import os
from utils import ToolGeneral


pwd = os.path.dirname(os.path.abspath(__file__))
tool = ToolGeneral()


class Hyperparams:
    '''Hyper parameters'''
    # Load sentiment dictionary
    deny_word = tool.load_dict(os.path.join(pwd,'dict','not.txt'))
    posdict = tool.load_dict(os.path.join(pwd,'dict','positive.txt'))
    negdict = tool.load_dict(os.path.join(pwd,'dict', 'negative.txt'))
    pos_neg_dict = posdict|negdict
    # Load adverb dictionary
    mostdict = tool.load_dict(os.path.join(pwd,'dict','most.txt'))
    verydict = tool.load_dict(os.path.join(pwd,'dict','very.txt'))
    moredict = tool.load_dict(os.path.join(pwd,'dict','more.txt'))
    ishdict = tool.load_dict(os.path.join(pwd,'dict','ish.txt'))
    insufficientlydict = tool.load_dict(os.path.join(pwd,'dict','insufficiently.txt'))
    overdict = tool.load_dict(os.path.join(pwd,'dict','over.txt'))
    inversedict = tool.load_dict(os.path.join(pwd,'dict','inverse.txt'))




print(os.path.abspath(__file__)  )

情感值分析

networks。py
import os
import sys
import jieba
import numpy as np
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
from utils import ToolGeneral
from hyperparameters import Hyperparams as hp


tool = ToolGeneral()
jieba.load_userdict(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'dict','jieba_sentiment.txt'))


class SentimentAnalysis():
    """
    Sentiment Analysis with some dictionarys
    """      
    def sentiment_score_list(self,dataset):
        seg_sentence = tool.sentence_split_regex(dataset)
        count1,count2 = [],[]
        for sentence in seg_sentence: 
            words = jieba.lcut(sentence, cut_all=False)
            i = 0 
            a = 0 
            for word in words:
                """
                poscount 积极词的第一次分值;
                poscount2 积极反转后的分值;
                poscount3 积极词的最后分值(包括叹号的分值)      
                """
                poscount,negcount,poscount2,negcount2,poscount3,negcount3 = 0,0,0,0,0,0  # 
                if word in hp.posdict : 
                    if word in ['好','真','实在'] and words[min(i+1,len(words)-1)] in hp.pos_neg_dict  and words[min(i+1,len(words)-1)] != word:
                        continue
                    else:
                        poscount +=1
                        c = 0
                        for w in words[a:i]: # 扫描情感词前的程度词
                            if w in hp.mostdict:
                                poscount *= 4
                            elif w in hp.verydict:
                                poscount *= 3 
                            elif w in hp.moredict:
                                poscount *= 2 
                            elif w in hp.ishdict:
                                poscount *= 0.5
                            elif w in hp.insufficientlydict:
                                poscount *= -0.3 
                            elif w in hp.overdict:
                                poscount *= -0.5 
                            elif w in hp.inversedict: 
                                c+= 1
                            else:
                                poscount *= 1
                        if tool.is_odd(c) == 'odd': # 扫描情感词前的否定词数
                            poscount *= -1.0
                            poscount2 += poscount
                            poscount = 0
                            poscount3 = poscount + poscount2 + poscount3
                            poscount2 = 0
                        else:
                            poscount3 = poscount + poscount2 + poscount3
                            poscount = 0
                        a = i+1
                elif word in hp.negdict: # 消极情感的分析,与上面一致              
                    if word in ['好','真','实在'] and words[min(i+1,len(words)-1)] in hp.pos_neg_dict and words[min(i+1,len(words)-1)] != word:
                        continue
                    else:
                        negcount += 1
                        d = 0
                        for w in words[a:i]:                         
                            if w in hp.mostdict:
                                negcount *= 4
                            elif w in hp.verydict:
                                negcount *= 3
                            elif w in hp.moredict:
                                negcount *= 2
                            elif w in hp.ishdict:
                                negcount *= 0.5
                            elif w in hp.insufficientlydict:
                                negcount *= -0.3
                            elif w in hp.overdict:
                                negcount *= -0.5
                            elif w in hp.inversedict:
                                d += 1
                            else:
                                negcount *= 1
                    if tool.is_odd(d) == 'odd':
                        negcount *= -1.0
                        negcount2 += negcount
                        negcount = 0
                        negcount3 = negcount + negcount2 + negcount3
                        negcount2 = 0
                    else:
                        negcount3 = negcount + negcount2 + negcount3
                        negcount = 0
                    a = i + 1      
                i += 1
                pos_count = poscount3
                neg_count = negcount3
                count1.append([pos_count,neg_count])           
            if words[-1] in ['!','!']:# 扫描感叹号前的情感词,发现后权值*2
                count1 = [[j*2 for j in c] for c in count1]
    
            for w_im in ['但是','但']:
                if w_im in words : # 扫描但是后面的情感词,发现后权值*5
                    ind = words.index(w_im)
                    count1_head = count1[:ind]
                    count1_tail = count1[ind:]            
                    count1_tail_new = [[j*5 for j in c] for c in count1_tail]
                    count1 = []
                    count1.extend(count1_head)
                    count1.extend(count1_tail_new)
                    break          
            if words[-1] in ['?','?']:# 扫描是否有问好,发现后为负面
                count1 = [[0,2]]
    
            count2.append(count1)
            count1=[]
        return count2
      
        
    def sentiment_score(self,s):
        senti_score_list = self.sentiment_score_list(s)
        if senti_score_list != []:
            negatives=[]
            positives=[]
            for review in senti_score_list:
                score_array =  np.array(review)
                AvgPos = np.sum(score_array[:,0])
                AvgNeg = np.sum(score_array[:,1])        
                negatives.append(AvgNeg)
                positives.append(AvgPos)   
            pos_score = np.mean(positives) 
            neg_score = np.mean(negatives)
            if pos_score >=0 and  neg_score<=0:
                pos_score = pos_score
                neg_score = abs(neg_score)
            elif pos_score >=0 and  neg_score>=0:
                pos_score = pos_score
                neg_score = neg_score    
        else:
            pos_score,neg_score=0,0
        return pos_score,neg_score
       
    def normalization_score(self,sent):
        score1,score0 = self.sentiment_score(sent)
        if score1 > 4 and score0 > 4:
            if score1 >= score0:
                _score1 = 1
                _score0 = score0/score1    
            elif score1 < score0:
                _score0 = 1
                _score1 = score1/score0  
        else :
            if score1 >= 4 :
                _score1 = 1
            elif score1 < 4 :
                _score1 = score1/4
            if score0 >= 4 :
                _score0 = 1
            elif score0 < 4 :
                _score0 = score0/4 
        return _score1,_score0
        



if __name__ =='__main__':
    sa = SentimentAnalysis()
    text = '白云山澄清复方板蓝根尚存不确定性'

    print(sa.normalization_score(text))

预测


from networks import SentimentAnalysis


SA = SentimentAnalysis()


def predict(sent):
    """
    1: positif
    0: neutral
    -1: negatif
    """
    score1,score0 = SA.normalization_score(sent)
    if score1 == score0:
        result = 0
    elif score1 > score0:
        result = 1
    elif score1 < score0:
        result = -1
    return result


import pandas as pd



if __name__ =='__main__':
    data = pd.read_csv(r'C:\Users\lenovo\白云山股票资讯与发布时间.csv',header=None)
    data_1 = []
    for item in data[0]:
        data_1.append(predict(item))
    data['情感值'] = pd.DataFrame(data_1)
    data.to_csv('资讯情感值.csv',header=False,index=False,mode='a+')

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值