文本情感简单判断的过程大概如下:经过预处理后的文本,首先识别不同极性类别的特征项,通过构建好的情感词表(褒义词、贬义词)、否定词词表、程度副词词表以及反问句标记词表做相应处理,获取该条文本中每个特征项的权值,最后作求和运算,获得整段文本的情感倾向值,进而判别出情感倾向性。
文本以句子为单位,以标点符号为分割标志,将每条文本分割为N个句子,提取每个句子中的情感词,根据以下情况计算权值:
1、出现程度副词修饰情感词
2、出现奇数否定词修饰情感词
3、该句子是包含情感词的感叹句
权值 = 情感词权重 * 相应情况出现的词的权重
句子的情感倾向度等于所有情感词计算出来的权值之和。
文本的情感倾向度等于所有句子计算出来的权值之和。
最终情感倾向值会出现下列三种情况:
文本的情感倾向度权值 > 0;为正面情感
文本的情感倾向度权值 = 0;为中性情感
文本的情感倾向度权值 < 0;为负面情感
下面是用Java简单实现文本情感简单判断的一个例子:
- public class Emotion {
- //褒义词
- public static final int COMMENDATORY = 1;
- //贬义词
- public static final int DEROGRATORY = 2;
- //中性词
- public static final int NEUTRAL = 3;
- public static int judge(String content) {
- //把内容划分为句子
- String[] sentences = content.split("[,.,。]");
- //判断每个句子的情感
- double cWeight = 0;
- for (String sentence : sentences) {
- double sWeight = 0;
- //判断句子是否是感叹句
- boolean isExclamatorySentence = WordUtils.isExclamatorySentence(sentence);
- String[] words = WordUtils.splitByNlp(sentence);
- ShowUtils.printToConsole(words);
- for (int i = 0, len = words.length; i < len; i++) {
- String word = words[i];
- //判断是否是情感词
- boolean isEmotion = WordUtils.isEmotionWord(word);
- if (!isEmotion) continue;
- double weight = WordUtils.getWordWeight(word);
- int negativeNum = 0;
- String negativeWord = null;
- int index = new Integer(i);
- while (index > 0) {
- String preWord = words[i - 1];
- if (WordUtils.isEmotionWord(preWord)) break;
- //判断副词
- boolean isAdverbs = WordUtils.isAdverbsWord(preWord);
- if (isAdverbs) {
- double aWeight = WordUtils.getWordWeight(preWord);
- weight = weight * aWeight;
- }
- boolean isNegative = WordUtils.isNegativeWord(preWord);
- if (isNegative) {
- if (null == negativeWord) negativeWord = preWord;
- negativeNum += 1;
- }
- index--;
- }
- //判断否定词
- if (negativeNum % 2 == 1) {
- double nWeight = WordUtils.getWordWeight(negativeWord);
- weight = weight * nWeight;
- }
- //判断感叹词
- if (isExclamatorySentence) {
- double iWeight = WordUtils.getInterjectionWeight();
- weight = weight * iWeight;
- }
- //句子权值等于情感词的权值求和
- sWeight += weight;
- }
- System.out.println("sWeight: " + sWeight);
- //文本权值等于句子的权值求和
- cWeight += sWeight;
- }
- System.out.println("cWeight: " + cWeight);
- return cWeight == 0 ? NEUTRAL : (cWeight > 0 ? COMMENDATORY : DEROGRATORY);
- }
- public static void main(String[] args) {
- String content = "我喜欢这个明星";
- System.out.println(judge(content));
- content = "我讨厌这个明星";
- System.out.println(judge(content));
- content = "我不讨厌这个明星";
- System.out.println(judge(content));
- content = "我不是非常喜欢这个明星!";
- System.out.println(judge(content));
- content = "这本小说太无聊了,内容枯燥,文笔特别烂。";
- System.out.println(judge(content));
- System.exit(0);
- }
- }