集成了网上几个成熟的词云图方案而构造的一个比较靠谱的。
结构:
在任意一空文件夹中放入main.py,main2.py
以及simhei.ttf(自选字体,可改名,代码自改),stop.txt(停用词),001.png(词云用背景框架)
main.py内容:
import requests
def get_comments(product_id, sort_type, page, page_size):
# 根据用户选择的好评或差评,设置相应的评分
score = '3' if sort_type == '好评' else '1'
# 构建获取评论的URL
url = f"https://api.m.jd.com/?appid=item-v3&functionId=pc_club_productPageComments&client=pc&clientVersion=1.0.0&t=1710622550689&loginType=3&uuid=181111935.1997641277.1707936015.1710416671.1710620660.8&productId={product_id}&score={score}&sortType=5&page={page}&pageSize={page_size}&isShadowSku=0&fold=1&bbtf=&shield="
try:
response = requests.get(url=url)
# 检查响应状态码
if response.status_code == 200:
json_data = response.json()
if 'comments' in json_data:
return json_data['comments']
else:
return []
else:
print(f"请求失败,状态码:{response.status_code}")
return []
except requests.exceptions.RequestException as e:
# 打印错误信息
print(f"请求异常:{e}")
return []
def filter_comments(comments):
return [comment for comment in comments if '此用户未填写评价内容' not in comment['content']]
def save_comments_to_file(comments, filename):
with open(filename, 'w', encoding='utf-8') as file:
for comment in comments:
content = comment['content']
file.write(content + '\n')
def main():
# 商品ID
product_id = input("请输入商品ID: ")
# 用户选择获取好评或差评
sort_type = input("请输入好评或差评的选择('好评' 或 '差评'): ").lower()
if sort_type not in ['好评', '差评']:
print("输入无效,请输入'好评'或'差评'")
return
# 初始化评论列表
comments = []
page = 0
page_size = 10 # 每页评论数量
max_comments = 200 # 最大评论数量
# 持续获取评论直到没有更多评论或达到最大评论数量为止
while len(comments) < max_comments:
comments_page = get_comments(product_id, sort_type, page, page_size)
if comments_page:
comments.extend(comments_page)
page += 1
# 检查是否已经达到200条评论
if len(comments) >= max_comments:
break
else:
break
# 过滤评论
filtered_comments = filter_comments(comments)
if filtered_comments:
# 保存评论到文件
filename = 'text.txt' # 文件名
save_comments_to_file(filtered_comments, filename)
print(f"评论已保存到文件:{filename}")
for comment in filtered_comments:
content = comment['content']
print(content)
else:
print("没有评论可显示")
if __name__ == "__main__":
main()
main2.py内容:
import jieba
from wordcloud import WordCloud
import numpy as np
from PIL import Image, ImageDraw
from matplotlib import colors
import re
# 获取用户输入
user_input = input("请输入您的评价(好评/差评):")
# 根据用户输入设置颜色
if user_input.startswith("好评"):
good_review_colors = ['#90EE90', '#ADFF2F', '#FFD700', '#FFA07A', '#32CD32']
colormaps = colors.ListedColormap(good_review_colors)
elif user_input.startswith("差评"):
bad_review_colors = ['#FF0000', '#FF6347', '#FFA07A', '#FF7F50', '#CD5C5C']
colormaps = colors.ListedColormap(bad_review_colors)
else:
colormaps = colors.ListedColormap([
'#ADD8E6', '#87CEEB', '#6495ED', '#4682B4', '#1874CD',
'#104E8B', '#0000FF', '#00008B', '#000080', '#191970'
])
# 读取文本文件
f = open(r"text.txt", "r", encoding="utf-8")
text = f.read()
f.close()
# 去除文本中的标点符号、表情和空格
clean_text = re.sub(r'[^\w]', '', text)
# 使用jieba进行分词
words_list_jieba = jieba.lcut(clean_text)
# 读取停用词文件
stop_words = set(['\n'])
with open("stop.txt", 'r', encoding='utf-8') as f1:
for line in f1:
stop_words.add(line.strip())
f1.close()
# 创建词频字典
word_freq = {}
for word in words_list_jieba:
if word not in stop_words:
word_freq[word] = word_freq.get(word, 0) + 1
# 背景图片
background_image = np.array(Image.open('001.png'))
# 生成词云
wordcloud = WordCloud(
font_path='simhei.ttf',
prefer_horizontal=0.99,
background_color='white',
max_words=10000,
max_font_size=200,
min_font_size=5,
stopwords=stop_words,
mask=background_image,
repeat=True
).fit_words(word_freq)
# 展示和保存词云图片
image = wordcloud.to_image()
draw = ImageDraw.Draw(image)
border_color = 'black'
border_width = 5
width, height = image.size
draw.rectangle([(0, 0), (width, height)], outline=border_color, width=border_width)
image.show()
image.save('词云图_带边框.png')
实例:
①停用词格式:
一下
一个
一些
一何
一切
一则
一则通过
一天
一定
一方面
一旦
...
②先点main.py,再点main2.py
③001.png内容:
④词云图效果: