pandas数据分析京东评论者衣服购买情况pyecharts生成可视化图表

pyecharts官网: https://pyecharts.org/#/zh-cn/composite_charts 

# https://blog.csdn.net/weixin_45081575
import os
import json
import requests
import pandas as pd
import jieba.analyse
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.globals import SymbolType
from pyecharts.charts import Pie,Bar,Map,WordCloud,Liquid,Page

url = "https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv59&productId=100001068301&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&rid=0&fold=1"
# url = "https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100002148075&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&rid=0&fold=1"

headers = {
    'Referer': 'https://item.jd.com/100001068301.html',
    # 'Sec-Fetch-Mode': 'no-cors',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
}
# 过滤词
stop_words_txt = "stop_words.txt"


def get_comment(file_name):
    num = 1
    for i in range(0,50):
        print(f"处理第{i+1}页")
        resp = requests.get(url.format(i), headers=headers)
        resp_list = json.loads(resp.text[24:-2])
        # content 评价;productColor 颜色;productSize 尺码 referenceTime 购买时间 nickname昵称
        comment_list = []
        for comment in resp_list["comments"]:
            print(comment["nickname"])
            data = {"num":num,"nickname":comment["nickname"],
                "bra_size":comment['productSize'],"color":comment['productColor'],
                "comment":(comment['content']).replace("\n"," "),"date":comment['referenceTime']
            }
            comment_list.append(data)
            num += 1
        save_to_excel(file_name,comment_list)
    print("表格保存完毕")


def save_to_excel(file_name,comment_list):
    # 如果存在,则追加数据到表格,第一次执行的时候会创建表格,之后的数据则以追加的形式写入
    if os.path.exists(file_name):
        df = pd.read_excel(file_name)
        df = df.append(comment_list)
    else:
        df = pd.DataFrame(comment_list)
    writer = pd.ExcelWriter(file_name)
    df.to_excel(excel_writer=writer,sheet_name="jd_comment",columns=["num","nickname","bra_size","color","comment","date"],
                index=False,encoding="utf-8")
    writer.save()
# 颜色分布柱状图 https://blog.csdn.net/weixin_45081575/article/details/103449805
def color_chart(df):
    print("准备生成:颜色分布柱状图")
    colors = list(df.color.value_counts().items())
    colors = colors[:10] # 取前面10个颜色
    # print(colors)
    bar = (
        Bar()
            .add_xaxis(list(data[0] for data in colors))
            .add_yaxis("颜色购买统计",list(data[1]for data in colors))
            .set_global_opts(title_opts=opts.TitleOpts(title="颜色分布柱状图"),
                             xaxis_opts=opts.AxisOpts(name="颜色"),
                             yaxis_opts=opts.AxisOpts(name="数量"),
                             toolbox_opts=opts.ToolboxOpts() # ToolboxOpts工具箱
                             )
    )
    bar.render(path="颜色柱状图.html")

# 购买者分布柱状图
def nick_name(df):
    print("准备生成:购买者分布柱状图")
    nick_names = list(df.nickname.value_counts().items())
    nick_names = nick_names[:10]
    bar = (
        Bar()
            .add_xaxis(list(data[0] for data in nick_names))
            .add_yaxis("购买者数量",list(data[1] for data in nick_names))
            .set_global_opts(title_opts=opts.TitleOpts(title="购买者分布柱状图"),
                             xaxis_opts=opts.AxisOpts(name="购买者"),
                             yaxis_opts=opts.AxisOpts(name="数量"),
                             toolbox_opts=opts.ToolboxOpts()
                             )
    )
    bar.render("购买者分布柱状图.html")
# 尺码分布图
def size_chart(df):
    print("准备生成:尺码分布柱状图")
    sizes = sorted(list(df.bra_size.value_counts().items()))
    bar = (
        Bar()
            .add_xaxis(list(data[0] for data in sizes))
            .add_yaxis("尺码购买统计",list(data[1] for data in sizes))
            .set_global_opts(title_opts=opts.TitleOpts(title="尺码分布柱状图"),
                             xaxis_opts=opts.AxisOpts(name="尺码"),
                             yaxis_opts=opts.AxisOpts(name="数量"),
                             toolbox_opts=opts.ToolboxOpts())
    )
    bar.render("尺码柱状图.html")

# 区间饼图和柱状图
def avg_cup(df):
    print("准备生成:区间饼图和柱状图")
    size_list = sorted(list(df.bra_size.value_counts().items()))
    cup_dic = {i:0 for i in "ABCD"}
    for data in size_list:
        if "A" in data[0]:
            cup_dic['A'] += data[1]
        if "B" in data[0]:
            cup_dic['B'] += data[1]
        if "C" in data[0]:
            cup_dic['C'] += data[1]
        if "D" in data[0]:
            cup_dic['D'] += data[1]
    bar = (
        Bar()
            .add_xaxis(list(cup_dic.keys()))
            .add_yaxis("尺码数量",list(cup_dic.values()))
            .set_global_opts(title_opts=opts.TitleOpts(title="尺码区间柱状图"),
                             xaxis_opts=opts.AxisOpts(name="尺码"),
                             yaxis_opts=opts.AxisOpts(name="数量"),
                             toolbox_opts=opts.ToolboxOpts()
                             )
    )
    bar.render("区间柱状图.html")
    pie = (
        Pie()
            .add("数量",list(cup_dic.items()))
            .set_global_opts(title_opts=opts.TitleOpts(title="尺码区间饼图"))
            .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{c}(占比:{d}%)")) # b代表名字,c代表数量,d代表百分比
    )
    pie.render("区间饼图.html")
    return (bar,pie)

# 评论词云
def word_cloud(df):
    print("准备生成:评论词云")
    if os.path.exists(stop_words_txt):
        jieba.analyse.set_stop_words(stop_words_txt)
    kw_list = jieba.analyse.textrank(''.join(df.comment),topK=65,withWeight=True)

    word_cloud = (
        WordCloud(init_opts=opts.InitOpts(bg_color='#c7edcc'))
            # '传入列表,word_size_range为字体大小,shape为词云的形状'
            # 形状 RECT、ROUND_RECT、TRIANGLE、DIAMOND、ARROW
            # mask_image = "aizhong-logo.png" # 自定义形状
            # .add("",kw_list,word_size_range=[15, 100],mask_image="aizhong-logo.png")
            .add("",kw_list,word_size_range=[15, 100],shape=SymbolType.DIAMOND)
            .set_global_opts(title_opts=opts.TitleOpts(title="评论标题词云Top65"),
                             toolbox_opts=opts.ToolboxOpts())
    )
    word_cloud.render("词云.html")
    return word_cloud
# 水滴图
def water():
    print("准备生成:今日湿度水滴图")
    liquid = (
        Liquid()
            .add("lq", [0.45,0.5,0.6],is_outline_show=False,shape=SymbolType.DIAMOND) # 第一个值为显示的值百分比,第二个指为水的分量
            .set_global_opts(title_opts=opts.TitleOpts(title="今日湿度水滴图"),
                             toolbox_opts=opts.ToolboxOpts())
    )
    liquid.render("今日湿度水滴图.html")
    return liquid

if __name__ == '__main__':
    file_name = "jd_comment.xlsx"
    if  not os.path.exists(file_name):
        print("表格不存在")
        get_comment(file_name)
    df = pd.read_excel(file_name)
    color_chart(df)
    word_cloud = word_cloud(df)
    nick_name(df)
    size_chart(df)
    bar,pie = avg_cup(df)
    liquid = water()
    # 接下来生成组合图表 https://pyecharts.org/#/zh-cn/composite_charts
    page = Page(layout=Page.DraggablePageLayout)
    page.add(liquid,bar,pie,word_cloud)
    # page.render("all.html")# 这个生成的是按顺序存放的图表
    # 先生成all.html,然后就不要再重新生成了,直接在这上面调整到合适位置后点击左上角save config,生成chart_config.json
    # 读取all.html,并利用chart_config.json的设置重新生成新的resize_render.html
    Page.save_resize_html("all.html", cfg_file="chart_config.json")

参考:https://blog.csdn.net/weixin_45081575/article/details/103449805

其中过滤词stop_words.txt,第一行要空出来,从第二行开始写,一行一个词,保存成utf-8编码格式,例如:京东

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值