京东评论词云图解决方案

code_ent

于 2024-03-17 06:52:22 发布

阅读量457

点赞数 7

分类专栏：统计学与编程文章标签： python

本文链接：https://blog.csdn.net/code_ent/article/details/136774424

版权

统计学与编程专栏收录该内容

6 篇文章 0 订阅

订阅专栏

集成了网上几个成熟的词云图方案而构造的一个比较靠谱的。

结构：

在任意一空文件夹中放入main.py，main2.py

以及simhei.ttf（自选字体，可改名，代码自改），stop.txt（停用词），001.png（词云用背景框架）

main.py内容：

import requests


def get_comments(product_id, sort_type, page, page_size):
    # 根据用户选择的好评或差评，设置相应的评分
    score = '3' if sort_type == '好评' else '1'

    # 构建获取评论的URL
    url = f"https://api.m.jd.com/?appid=item-v3&functionId=pc_club_productPageComments&client=pc&clientVersion=1.0.0&t=1710622550689&loginType=3&uuid=181111935.1997641277.1707936015.1710416671.1710620660.8&productId={product_id}&score={score}&sortType=5&page={page}&pageSize={page_size}&isShadowSku=0&fold=1&bbtf=&shield="
    try:
        response = requests.get(url=url)
        # 检查响应状态码
        if response.status_code == 200:
            json_data = response.json()
            if 'comments' in json_data:
                return json_data['comments']
            else:
                return []
        else:
            print(f"请求失败，状态码：{response.status_code}")
            return []
    except requests.exceptions.RequestException as e:
        # 打印错误信息
        print(f"请求异常：{e}")
        return []


def filter_comments(comments):
    return [comment for comment in comments if '此用户未填写评价内容' not in comment['content']]

def save_comments_to_file(comments, filename):
    with open(filename, 'w', encoding='utf-8') as file:
        for comment in comments:
            content = comment['content']
            file.write(content + '\n')
def main():
    # 商品ID
    product_id = input("请输入商品ID: ")
    # 用户选择获取好评或差评
    sort_type = input("请输入好评或差评的选择（'好评' 或 '差评'）: ").lower()
    if sort_type not in ['好评', '差评']:
        print("输入无效，请输入'好评'或'差评'")
        return
    # 初始化评论列表
    comments = []
    page = 0
    page_size = 10  # 每页评论数量
    max_comments = 200  # 最大评论数量
    # 持续获取评论直到没有更多评论或达到最大评论数量为止
    while len(comments) < max_comments:
        comments_page = get_comments(product_id, sort_type, page, page_size)
        if comments_page:
            comments.extend(comments_page)
            page += 1
            # 检查是否已经达到200条评论
            if len(comments) >= max_comments:
                break
        else:
            break
    # 过滤评论
    filtered_comments = filter_comments(comments)
    if filtered_comments:
        # 保存评论到文件
        filename = 'text.txt'  # 文件名
        save_comments_to_file(filtered_comments, filename)
        print(f"评论已保存到文件：{filename}")
        for comment in filtered_comments:
            content = comment['content']
            print(content)
    else:
        print("没有评论可显示")
if __name__ == "__main__":
    main()

main2.py内容：

import jieba
from wordcloud import WordCloud
import numpy as np
from PIL import Image, ImageDraw
from matplotlib import colors
import re

# 获取用户输入
user_input = input("请输入您的评价（好评/差评）：")
# 根据用户输入设置颜色
if user_input.startswith("好评"):
    good_review_colors = ['#90EE90', '#ADFF2F', '#FFD700', '#FFA07A', '#32CD32']
    colormaps = colors.ListedColormap(good_review_colors)
elif user_input.startswith("差评"):
    bad_review_colors = ['#FF0000', '#FF6347', '#FFA07A', '#FF7F50', '#CD5C5C']
    colormaps = colors.ListedColormap(bad_review_colors)
else:
    colormaps = colors.ListedColormap([
        '#ADD8E6', '#87CEEB', '#6495ED', '#4682B4', '#1874CD',
        '#104E8B', '#0000FF', '#00008B', '#000080', '#191970'
    ])

# 读取文本文件
f = open(r"text.txt", "r", encoding="utf-8")
text = f.read()
f.close()

# 去除文本中的标点符号、表情和空格
clean_text = re.sub(r'[^\w]', '', text)

# 使用jieba进行分词
words_list_jieba = jieba.lcut(clean_text)

# 读取停用词文件
stop_words = set(['\n'])
with open("stop.txt", 'r', encoding='utf-8') as f1:
    for line in f1:
        stop_words.add(line.strip())
f1.close()

# 创建词频字典
word_freq = {}
for word in words_list_jieba:
    if word not in stop_words:
        word_freq[word] = word_freq.get(word, 0) + 1

# 背景图片
background_image = np.array(Image.open('001.png'))

# 生成词云
wordcloud = WordCloud(
    font_path='simhei.ttf',
    prefer_horizontal=0.99,
    background_color='white',
    max_words=10000,
    max_font_size=200,
    min_font_size=5,
    stopwords=stop_words,
    mask=background_image,
    repeat=True
).fit_words(word_freq)

# 展示和保存词云图片
image = wordcloud.to_image()
draw = ImageDraw.Draw(image)
border_color = 'black'
border_width = 5
width, height = image.size
draw.rectangle([(0, 0), (width, height)], outline=border_color, width=border_width)
image.show()
image.save('词云图_带边框.png')