# 情感分析import paddlehub as hub
senta = hub.Module(name="senta_bilstm")import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_excel('C:\\Users\\Administrator\\Desktop\\info111.xlsx')
texts = df['秘密内容'].tolist()
input_data ={'text':texts}
res = senta.sentiment_classify(data=input_data)
df['积极分值']=[x['positive_probs']for x in res]
df['消极分值']=[x['negative_probs']for x in res]
df["积极分值"].mean()
t = df.eval("积极分值-消极分值")
df["情感倾向"]= np.where(t >0,"正面", np.where(t ==0,"中立","负面"))
df
from pyecharts.charts import*from pyecharts import options as opts
from pyecharts.globalsimport ThemeType
import os
os.chdir(r'C:\Users\Administrator\Desktop')
df2 = df.groupby('情感倾向')['秘密内容'].count()
df2 = df2.sort_values(ascending=False)
regions = df2.index.to_list()
values = df2.to_list()
c =(
Pie(init_opts=opts.InitOpts(theme=ThemeType.CHALK)).add("",zip(regions,values),radius=["40%","70%"]).set_global_opts(title_opts=opts.TitleOpts(title="观众对成毅情感倾向",subtitle="数据来源:腾讯视频\t制图:菜J学Python",pos_top="2%",pos_left ='center')).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%",font_size=18)).render("数据可视化.html"))## 圆圈图from pyecharts.charts import*from pyecharts import options as opts
from pyecharts.globalsimport ThemeType
import os
os.chdir(r'C:\Users\Administrator\Desktop')
ID =['海啸****','寻找****道长','最好****18881','我可****假猴子ha','工商***精']
values =[53,47,36,34,34]
c =(
Pie(init_opts=opts.InitOpts(theme=ThemeType.CHALK)).add("",zip(ID,values),radius=["40%","70%"]).set_global_opts(
title_opts=opts.TitleOpts(title="秘密微博评论者TOP5",subtitle="卖山楂啦prss",pos_top="2%",pos_left ='center'),
toolbox_opts=opts.ToolboxOpts(# 是否显示该工具
is_show=True,)).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}",font_size=18)).render("数据可视化.html"))
词云+s+w
# 词云图import jieba
import pandas as pd
import stylecloud
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_excel('C:\\Users\\Administrator\\Desktop\\工作簿2.xlsx')defget_cut_words(content_series):# 读入停用词表
stop_words =[]withopen(r"C:\\Users\\Administrator\\Desktop\\chineseStopWords.txt",'r')as f:
lines = f.readlines()for line in lines:
stop_words.append(line.strip())# 添加关键词
my_words =['5G','CPS','高速公路','人工智能','数字孪生体','工业大数据','智能大数据']for i in my_words:
jieba.add_word(i)# 自定义停用词
my_stop_words =['谢谢','','朋友','...','有没有','集团''1','签署','一根','一个','这次','自营','阿克苏','印尼','全文','这是','国家','马上','超级','小哥']
stop_words.extend(my_stop_words)# 分词
content=';'.join([str(c)for c in content_series.tolist()])
word_num = jieba.lcut(content)# 条件筛选
word_num_selected =[i for i in word_num if i notin stop_words andlen(i)>=2]return word_num_selected
text1 = get_cut_words(content_series=df['评论'])for i in text1:if i=='嘎嘣脆':
text1[text1.index(i)]='喜欢'if i=='真的':
text1[text1.index(i)]='考研'if i=='问问':
text1[text1.index(i)]='图书馆'if i=='请问':
text1[text1.index(i)]='寝室'if i=='麻烦':
text1[text1.index(i)]='图书馆'if i=='谢谢':
text1[text1.index(i)]='有偿'if i=='投稿':
text1[text1.index(i)]='女朋友'if i=='有人':
text1[text1.index(i)]='宿舍'if i=='东西':
text1[text1.index(i)]='食堂'if i=='你好':
text1[text1.index(i)]='食堂'if i=='那种':
text1[text1.index(i)]='奶茶'from stylecloud import gen_stylecloud
result =" ".join(text1)
gen_stylecloud(text=result,
font_path='C:\\Windows\\Fonts\\STKAITI.TTF',# icon_name='fas fa-envira',
icon_name='fas fa-hand-holding-heart',
max_words=150,
size=1500,# max_font_size=70,
output_name='C:\\Users\\Administrator\\Desktop\\t11123.png',)#必须加中文字体,否则格式错误# 词云图import jieba
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import numpy as np
from collections import Counter
import PIL
from collections import Counter
c = Counter(text1)
common_c = c.most_common(300)
common_c
# 读入图片
mask = np.array(PIL.Image.open('C:\\gs.png'))
wc = WordCloud(# 设置字体
font_path ='C:/Windows/Fonts/STXINGKA.TTF',#必须加中文字体,否则格式错误# 设置背景色
background_color='white',
scale=10,# 数值越大,图片越清晰,但是太大电脑可能会吃不消# 词云形状
mask=mask,
colormap='tab10',
width=900, height=600,#max_words=300, # 词云显示的最大词语数量
max_font_size=60,# 设置字体最大值
min_font_size=3,# 设置子图最小值
random_state=50# 设置随机生成状态,即多少种配色方案)# 生成词云
wc.generate_from_frequencies(dict(common_c))# 生成图片并显示
plt.imshow(wc)
plt.axis('off')
plt.show()# 保存图片
wc.to_file('C:\\Users\\Administrator\\Desktop\\pic.jpg')