京东搜索螺蛳粉商品评论数据:
链接:https://pan.baidu.com/s/1tF2MEy-ZDOCexR1JHA7DnQ
提取码:6kxm
import pandas as pd
from PIL import Image
import numpy
#CalThreeKingdoms.py
import jieba
txt=open("D:/2.txt",'r',encoding="utf-8").read()
words=jieba.lcut(txt) #jieba库函数
count={} #创建字典
for word in words:
if len(word)==1:
continue
else:
count[word]=count.get(word,0)+1
items=list(count.items()) #转换成列表
items.sort(key=lambda x:x[-1],reverse=True)
for i in range(300):
word,count=items[i]
print("{0:<10}{1:>5}".format(word,count))
f = open('词频1.csv', 'a', encoding='gbk')
f.write('{},{}\n'.format(word,count))
f.close()
输出词频,并保存为csv文件
把csv文件里的一些不重要的词删掉,再把csv文件保存为词频1.txt
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 生成词云
def create_word_cloud():
frequencies = {}
for line in open("./词频1.txt",encoding='utf8'):#根据词频生成词云
arr = line.split(",")
frequencies[arr[0]] = float(arr[1])
# 打开背景图片
#color_mask = numpy.array(Image.open('map.png'))
wc = WordCloud(
#mask=color_mask, # 指定背景图形状
font_path="simsun.ttc", # 设置词云字体,支持中文
background_color="white", # 词云图的背景颜色
max_words=120,#词云图里最多显示多少个词
max_font_size=60,#设置字体最大值
random_state=30,#设置有多少种随机生成状态,即有多少种配色方案
#stopwords=stop_words,词云里想要去掉的停词,可以写入txt文档里
#读取stopwords:stop_words = open('1.txt',encoding='utf8').read().split("\n")
scale=10#数字越大词云图越清晰
)
word_cloud = wc.generate_from_frequencies(frequencies)
# 写词云图片
word_cloud.to_file("wordcloud2.jpg")#保存为wordcloud2.jpg,可以到wordcloud2.jpg里看词云图
# 显示词云文件
plt.imshow(word_cloud)
plt.axis("off")
plt.show()
# 根据词频生成词云
create_word_cloud()