'''
每次处理两百
'''
import jieba # 分词
import re # 过滤特殊字符
import numpy as np # 辅助处理
import pandas as pd # 处理
import emoji # 过滤表情
import jieba.analyse # 分级级别权重
import imageio # 图片
import jieba.posseg as pseg # 词性标注
from wordcloud import WordCloud # 词云
import os
import matplotlib.pyplot as plt # 画布
import difflib # 相似度判断
import configparser # 配置文件导入
# 路径配置导入
def cfg():
# 生成config对象
conf = configparser.ConfigParser()
# 用config对象读取配置文件
conf.read("config.ini" , encoding='utf-8')
# 以列表形式返回所有的section
sections = conf.sections()
items = conf.items('filePaths')
items = dict(items)
return items
# 采用关键词典简化数据
def wts_dict():
wts_lst = []
with open(cfg()['wts_dict_path'], encoding='utf-8') as f:
for line in f:
line = line.replace("\n", "").replace("\r", "")
wts_lst.append(line)
return wts_lst
# 文件加载
def dict_load(path):
print("文件加载!")
dt = []
with open(path, encoding='utf-8-sig') as f:
for line in f:
if line.strip() != '': # 去除空格
dt.append(line.strip())
return (dt)
# 情感值计算
def sents(sent, negdict, posdict, nodict):
pos = 0 # 积极
neg = 0 # 消极
for i in range(len(sent)):
if sent[i] in negdict:
if i == 1 and sent[i - 1] in nodict:
pos = pos + 1 # 否定-消极
elif i == 1 and sent[i - 1] not in nodict:
neg = neg + 1 # 其他-消极
elif i > 1 and sent[i - 1] in nodict:
if sent[i - 2] in nodict:
neg = neg + 1 # 否定-否定-消极
else:
pos = pos + 1 # 其他-否定-消极
elif i > 1 and sent[i - 1] not in nodict:
if sent[i
Python-在线文本情感分析实验
最新推荐文章于 2024-10-06 00:43:43 发布