python爬虫爬抖音小店商品数据+数据可视化

爬虫代码

爬虫代码是我调用的数据接口,可能会过一段时间用不了,欢迎大家留言评论,我会不定时更新

import requests
import time
cookies = {
    'token': '5549EB98B15E411DA0BD05935C0F225F',
    'tfstk': 'g1vopsc0sQ5SwD8TyEWSTmONZ3cA2u6CReedJ9QEgZ7byzeJYB2HbHn59UKF-Bb2-LpRegdhYZ8l9BBJKIwHfH9-V9n5F36CLV3tBwxWV9smPc5ZXrPVVnSUTjCrIVSuVV3tBxhz090oWUUCxtlcAZSPY_5EmiSGx9SzU9ocuMj_Tz8FLmmcYMzz8_SzuxS5v97e89oD0ZsECRNPWpRw343tphbF6WWfq_bw4a-JjKrhi7tAoJyenIfh7zb0LJJc4nKyrJwi9NR1y1Q9uxelKnSHyZLZQ-XVsCtdxUDuUTd2GL6JHVq1ZebR_KCm_oYGEefwUsqUp3Xhltj2QDF1kKbJ8LfqXRfd3dCNUIhjy6BljeJWrk2e7nK9Fs9nSr7BwG6VX3MunO-PYg5_g5RPkJsqvKPQO_SfmNF7C_i_gRW0kmm06H1PcG_tmmVQO_SfmNnmm5d5ais1W',
    'acw_tc': '0a472f9217345091456398947e0084937b6ae99590d77140bfd1bf4a248a00',
    'Hm_lvt_a19fd7224d30e3c8a6558dcb38c4beed': '1732521967,1733381547,1734087148,1734509413',
    'Hm_lpvt_a19fd7224d30e3c8a6558dcb38c4beed': '1734509413',
    'HMACCOUNT': '21B2E9F3C431CAF6',
    'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%22nr_7ltg9ho59%22%2C%22first_id%22%3A%2218e5b14d40423b7-08d1278a91f1d-26001b51-3686400-18e5b14d4052309%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2F%22%2C%22%24latest_utm_source%22%3A%22baidu%22%2C%22%24latest_utm_medium%22%3A%22cpc%22%2C%22%24latest_utm_campaign%22%3A%22%E6%96%B0%E6%8A%96SEM%22%2C%22%24latest_utm_term%22%3A%22%E6%96%B0%E6%8A%96%22%7D%2C%22%24device_id%22%3A%2218e5b14d40423b7-08d1278a91f1d-26001b51-3686400-18e5b14d4052309%22%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTkxZjA4MWE4OGI3ZWYtMDkyZTk1MDhlYjZiZjMtMjYwMDExNTEtMzY4NjQwMC0xOTFmMDgxYTg4YzI1MjkiLCIkaWRlbnRpdHlfbG9naW5faWQiOiJucl83bHRnOWhvNTkifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22nr_7ltg9ho59%22%7D%7D',
    'NR_MAIN_SOURCE_RECORD': '{"locationSearch":"","locationHref":"https://xd.newrank.cn/goods/hot/salesRank","referrer":"https://www.baidu.com/","source":30000,"keyword":"seo","firstReferrer":"","firstLocation":"","sourceHref":"https://xd.newrank.cn/goods/hot/salesRank"}',
    'auth_n': 'acihS1J+YcZGzUSRFhf1q09q8WdPhLV5Po6LZW6dWxedk67TpkmiwALw2uzOMhVy',
}

headers = {
    'Accept': '*/*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'Connection': 'keep-alive',
    # 'Cookie': 'token=5549EB98B15E411DA0BD05935C0F225F; tfstk=g1vopsc0sQ5SwD8TyEWSTmONZ3cA2u6CReedJ9QEgZ7byzeJYB2HbHn59UKF-Bb2-LpRegdhYZ8l9BBJKIwHfH9-V9n5F36CLV3tBwxWV9smPc5ZXrPVVnSUTjCrIVSuVV3tBxhz090oWUUCxtlcAZSPY_5EmiSGx9SzU9ocuMj_Tz8FLmmcYMzz8_SzuxS5v97e89oD0ZsECRNPWpRw343tphbF6WWfq_bw4a-JjKrhi7tAoJyenIfh7zb0LJJc4nKyrJwi9NR1y1Q9uxelKnSHyZLZQ-XVsCtdxUDuUTd2GL6JHVq1ZebR_KCm_oYGEefwUsqUp3Xhltj2QDF1kKbJ8LfqXRfd3dCNUIhjy6BljeJWrk2e7nK9Fs9nSr7BwG6VX3MunO-PYg5_g5RPkJsqvKPQO_SfmNF7C_i_gRW0kmm06H1PcG_tmmVQO_SfmNnmm5d5ais1W; acw_tc=0a472f9217345091456398947e0084937b6ae99590d77140bfd1bf4a248a00; Hm_lvt_a19fd7224d30e3c8a6558dcb38c4beed=1732521967,1733381547,1734087148,1734509413; Hm_lpvt_a19fd7224d30e3c8a6558dcb38c4beed=1734509413; HMACCOUNT=21B2E9F3C431CAF6; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22nr_7ltg9ho59%22%2C%22first_id%22%3A%2218e5b14d40423b7-08d1278a91f1d-26001b51-3686400-18e5b14d4052309%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2F%22%2C%22%24latest_utm_source%22%3A%22baidu%22%2C%22%24latest_utm_medium%22%3A%22cpc%22%2C%22%24latest_utm_campaign%22%3A%22%E6%96%B0%E6%8A%96SEM%22%2C%22%24latest_utm_term%22%3A%22%E6%96%B0%E6%8A%96%22%7D%2C%22%24device_id%22%3A%2218e5b14d40423b7-08d1278a91f1d-26001b51-3686400-18e5b14d4052309%22%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTkxZjA4MWE4OGI3ZWYtMDkyZTk1MDhlYjZiZjMtMjYwMDExNTEtMzY4NjQwMC0xOTFmMDgxYTg4YzI1MjkiLCIkaWRlbnRpdHlfbG9naW5faWQiOiJucl83bHRnOWhvNTkifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22nr_7ltg9ho59%22%7D%7D; NR_MAIN_SOURCE_RECORD={"locationSearch":"","locationHref":"https://xd.newrank.cn/goods/hot/salesRank","referrer":"https://www.baidu.com/","source":30000,"keyword":"seo","firstReferrer":"","firstLocation":"","sourceHref":"https://xd.newrank.cn/goods/hot/salesRank"}; auth_n=acihS1J+YcZGzUSRFhf1q09q8WdPhLV5Po6LZW6dWxedk67TpkmiwALw2uzOMhVy',
    'Origin': 'https://xd.newrank.cn',
    'Referer': 'https://xd.newrank.cn/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
    'content-type': 'application/json',
    'gw-c-v': '10000',
    'n-token': '9116298d52d64bbfb2bafa92267f74f2',
    'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

json_data = {
    # 'start': 3,
    'size': 20,
    'rankDate': '2024-12-17',
    'rankType': '',
    'dateType': '0',
    'cate': {
        'cate1': '',
        'cate2': '',
        'cate3': '',
        'cate4': '',
    },
    'source': '',
    'roomCount': '',
    'awemeCount': '',
    'nature': '',
    'sort': 'sales_money',
    'priceRange': '',
    'bigPromotionStart': '',
    'bigPromotionEnd': '',
}
results=[]
for start in range(1,2):
    json_data['start']=str(start)
    time.sleep(2)
    response = requests.post(
    'https://gw.newrank.cn/api/xd/xdnphb/nr/cloud/douyin/new/rank/hotGoodsSalesRank',
    cookies=cookies,
    headers=headers,
    json=json_data,).json()
    res_list = response['data']['list']
    # print(res_list)
    for data in res_list:
        result = {
            "商品名称": data['title'],
            "商品价格": data['ana_price'],
            "所属店铺": data['goods_source'],
            "商品类别": data['productTypeV3'],
            "商品类目": data['productTypeV2'],
            "商品种类": data['productTypeV1'],
            "商品销量": data['add_sales'],
            "关联直播":data['room_count'],
            "关联达人":data['user_count'],
            "关联视频":data['aweme_count'],
        }
        results.append(result)
        print(results)

数据分析可视化

import pandas as pd 
import numpy as np 
import jieba 
import time 

from pyecharts.charts import Bar,Line,Map,Page,Pie  
from pyecharts import options as opts 
from pyecharts.globals import SymbolType 
data=pd.read_excel('/home/mw/input/douyin9762/抖音近期商品热门商品排行.xlsx')
data.head(20)

data.info()

data.describe()

研究方法

商品类别分析

data["商品类别"].unique()

bar_list=data["商品类别"].value_counts().head(10)
bar_list

# 创建 Bar 实例
bar = Bar()
 
# 添加 X 轴数据和 Y 轴数据
bar.add_xaxis(bar_list.index.tolist())
bar.add_yaxis("商品个数", bar_list.values.tolist())
 
# 设置全局配置项
bar.set_global_opts(
    title_opts=opts.TitleOpts(title="男女个数", subtitle="数量"),
    xaxis_opts=opts.AxisOpts(name="商品类别"),
    yaxis_opts=opts.AxisOpts(name="个数"),
)
 
# 渲染图表到本地 HTML 文件
bar.render_notebook() 

通过代码欢迎数据可视化分析可以知道,目前是2024年底近期也是冬天卖羽绒服的是比较赚钱的,也是符合当下的产品,其次买的东西都是衣服方面比较多。

# 商品价格分析
data1["商品价格"]=data1["商品价格"].astype(int)
data1.info()

data1["price_cut"]=pd.cut(data1["商品价格"],bins=[0,500,1000,1500,2000,3000,5000])
data1.head(20)

data1.price_cut.cat.categori

con_data2=data1.copy()
con_data2["price_cut"]=pd.qcut(con_data2["商品价格"],6,labels=['实惠入门', '经济优选', '亲民进阶', '价值之选','舒适尊享','旗舰实惠'])
con_data2.head(10)

pie_list=con_data2.price_cut.value_counts()
pie_list

# 绘制饼图
pie = Pie()
pie.add(
    series_name="访问来源", 
    data_pair=[
        list(z) for z in zip(
            pie_list.index.tolist(),
            pie_list.values.tolist(),
        )
    ],
    radius=["40%", "70%"],  # 设置内外半径
)
pie.set_global_opts(title_opts=opts.TitleOpts(title='抖音近期热卖价格状况表现'), 
                     legend_opts=opts.LegendOpts(orient='vertical', pos_top='15%', pos_left='2%'))
pie.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
pie.set_colors(['#EF9050', '#3B7BA9', '#6FB27C','#CC0033','#003399','#800080'])
pie.render_notebook()

通过数据分析可以知道大部分的羽绒服都是在0-500指间的价格区间,占比在将近百分之20,其次是500-1000之间,对于普通人来说基本就是这个几个区间了,当然因为我拿的数据只有500条,所以可能还是会有误差在这个地方。

con_data2["商品类目"].unique()

bar2_list=con_data2["商品类目"].value_counts().head(10)
bar2_list

# 创建 Bar 实例
line = Line()
 
# 添加 X 轴数据和 Y 轴数据
line.add_xaxis(bar2_list.index.tolist())
line.add_yaxis("商品类目个数", bar2_list.values.tolist())
 
# 设置全局配置项
line.set_global_opts(
    title_opts=opts.TitleOpts(title="商品类目个数", subtitle="数量"),
    xaxis_opts=opts.AxisOpts(name="商品类目"),
    yaxis_opts=opts.AxisOpts(name="商品个数"),
)
 
# 渲染图表到本地 HTML 文件
line.render_notebook()

在抖音小店里面目前女装产品是最多的,比较适合卖衣服这个季节,其次是护肤品这些。

def get_cut_words(content_series):
    # 读入停用词表
    stop_words = [] 
    
    # 添加关键词
    my_words = ['中长款', '连帽长', '防滑软底', '2024新款'] 
    for i in my_words:
        jieba.add_word(i) 
        
              

    # 分词
    word_num = jieba.lcut(content_series.str.cat(sep='。'), cut_all=False)

    # 条件筛选
    word_num_selected = [i for i in word_num if i not in stop_words and len(i)>=2]
    
    return word_num_selected
import jieba
text = get_cut_words(content_series=data["商品名称"]) 
text[:20]

!pip install stylecloud

import stylecloud
from pathlib import Path
from IPython.display import Image # 用于在jupyter lab中显示本地图片

# 绘制词云图
stylecloud.gen_stylecloud(
    text=' '.join(text),
    collocations=False,
    font_path=(r'/home/mw/input/simhei4936/SimHei.ttf'),#2-1字体的Path路径,
    icon_name='fas fa-heart',
    size = 578,
    output_name='抖音商品标题关键字数据可视化.png'
)
Image(filename='抖音商品标题关键字数据可视化.png')

 

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

wangzaojun

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值