爬虫练习:实现对淘宝商品信息的批量采集

一、采集数据内容(淘宝主页面,搜索某款商品)

二、打开开发者工具查找数据包(F12)

三、查找url和User-Agent(搜索商品前先登录淘宝账号,代码中要使用Cookie

四、相关代码(如果遇到无法爬取,清除电脑Cookies,重新登录

# @Time: 2024/1/20 13:34
# @Author: 马龙强
# @File: 实现对淘宝商品信息的批量采集.py
# @software: PyCharm
"""
一、找到数据来源
    https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/
    https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.6.2&appKey=12574478&t=1705731765855&sign=6357d48f714f86dda710d52b2899050b&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E8%258D%25A3%25E8%2580%2580%5C%22%2C%5C%22tab%5C%22%3A%5C%22all%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5

二、代码实现
    1.发送请求
    2.获取数据
    3.解析数据
    4.保存数据


"""
import requests
from pprint import pprint
import csv

with open('taobao.csv',mode='w',newline='',encoding='utf-8') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(['title', 'price', 'Sales', 'city', 'shop', 'service', 'shopurl'])
headers = {
    #身份信息
    'Cookie': 'cna=dzm5Ha92PR4CAd2wp+tQzMHc; tracknick=tb645022401; thw=cn; t=a75a2b2fff83c2df3b9d3a1ec2d38bf4; l=fBMtGicnP_kuwiROBO5CFurza77tqIRb41PzaNbMiIEGa6ndtFwBJNCTs-zXSdtjgT1UaetzmSrNYdLHR3Ap9xDDB3h2q_WonxYCPR-V.; _m_h5_tk=70f5ea04195ab021df9f25d562305abb_1705736804431; _m_h5_tk_enc=31fba5120873e09f860e52f2e2e17118; _samesite_flag_=true; 3PcFlag=1705729247972; cookie2=1a58918e04b7769bd38b8bfac5ea9bd3; _tb_token_=373eeabee1deb; xlly_s=1; sgcookie=E10047qoGwgNl3LGghfd%2FnTCxZ8z6Kt0waKjJNJj6FF02EfEbUAbXqdbLwLsqOAP2rSbc9bAHjmSnkCvtBQ3JkLLcZmLK1MPzGwbvOUUbgqkhtg%3D; unb=2201472688672; uc3=vt3=F8dD3ChHGz6ZgXqCBHM%3D&lg2=WqG3DMC9VAQiUQ%3D%3D&nk2=F5RDKJ8fCCL1kTA%3D&id2=UUphy%2FZ9sl%2BpQUBLKw%3D%3D; csg=68b08a34; lgc=tb645022401; cancelledSubSites=empty; cookie17=UUphy%2FZ9sl%2BpQUBLKw%3D%3D; dnk=tb645022401; skt=624e0fd7bfee354e; existShop=MTcwNTcyOTMwOA%3D%3D; uc4=nk4=0%40FY4I65VueavAIcxjxd7mIMYOx%2FVgbA%3D%3D&id4=0%40U2grEJGCI3VFvq%2FFoBdVzPg9aP3pff79; _cc_=V32FPkk%2Fhw%3D%3D; _l_g_=Ug%3D%3D; sg=12d; _nk_=tb645022401; cookie1=U%2BX%2BQ4yL4nyP82W%2FJ%2BUIMO%2BErfUrT8WctXAz24g1GOg%3D; mt=ci=1_1; uc1=cookie15=UtASsssmOIJ0bQ%3D%3D&cookie21=UIHiLt3xTIkz&cookie16=V32FPkk%2FxXMk5UvIbNtImtMfJQ%3D%3D&cookie14=UoYekETkNS4aOw%3D%3D&existShop=false&pas=0; tfstk=eIn2n0j0onK2JdBIuzqazqvAjaqYbkdB0cN_IADghSV0GA2gbbc6HSMMMfoaZA3XHPOAQtnrTGsXMEHGblZMOBtBAxBYXlACMIduRxUceLtWAHMxnWijZc-BHB7wdik8BafAVyOnK7mriSqawZ-W1GSZ48ATtx-VDJm83GPr37R3m02qsWD0aGjG405TESQxXq5G7r28UW9yUOWs7dN01tuVWNUxy8PBHKQOWrY7UW9RhNQTk5wzOKVc.; isg=BNXVAbX2HL-lgzhajFaho1LM5NGP0onkTacktVd6kcybrvWgHy',
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
    'referer': 'https://s.taobao.com/'
}
# url中的callback=mtopjsonp1&去掉、type=jsonp&dataType=jsonp改为type=json&dataType=json
url='https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?jsv=2.6.2&appKey=12574478&t=1705731765855&sign=6357d48f714f86dda710d52b2899050b&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=json&dataType=json&data=%7B%22appId%22%3A%2234385%22%2C%22params%22%3A%22%7B%5C%22device%5C%22%3A%5C%22HMA-AL00%5C%22%2C%5C%22isBeta%5C%22%3A%5C%22false%5C%22%2C%5C%22grayHair%5C%22%3A%5C%22false%5C%22%2C%5C%22from%5C%22%3A%5C%22nt_history%5C%22%2C%5C%22brand%5C%22%3A%5C%22HUAWEI%5C%22%2C%5C%22info%5C%22%3A%5C%22wifi%5C%22%2C%5C%22index%5C%22%3A%5C%224%5C%22%2C%5C%22rainbow%5C%22%3A%5C%22%5C%22%2C%5C%22schemaType%5C%22%3A%5C%22auction%5C%22%2C%5C%22elderHome%5C%22%3A%5C%22false%5C%22%2C%5C%22isEnterSrpSearch%5C%22%3A%5C%22true%5C%22%2C%5C%22newSearch%5C%22%3A%5C%22false%5C%22%2C%5C%22network%5C%22%3A%5C%22wifi%5C%22%2C%5C%22subtype%5C%22%3A%5C%22%5C%22%2C%5C%22hasPreposeFilter%5C%22%3A%5C%22false%5C%22%2C%5C%22prepositionVersion%5C%22%3A%5C%22v2%5C%22%2C%5C%22client_os%5C%22%3A%5C%22Android%5C%22%2C%5C%22gpsEnabled%5C%22%3A%5C%22false%5C%22%2C%5C%22searchDoorFrom%5C%22%3A%5C%22srp%5C%22%2C%5C%22debug_rerankNewOpenCard%5C%22%3A%5C%22false%5C%22%2C%5C%22homePageVersion%5C%22%3A%5C%22v7%5C%22%2C%5C%22searchElderHomeOpen%5C%22%3A%5C%22false%5C%22%2C%5C%22search_action%5C%22%3A%5C%22initiative%5C%22%2C%5C%22sugg%5C%22%3A%5C%22_4_1%5C%22%2C%5C%22sversion%5C%22%3A%5C%2213.6%5C%22%2C%5C%22style%5C%22%3A%5C%22list%5C%22%2C%5C%22ttid%5C%22%3A%5C%22600000%40taobao_pc_10.7.0%5C%22%2C%5C%22needTabs%5C%22%3A%5C%22true%5C%22%2C%5C%22areaCode%5C%22%3A%5C%22CN%5C%22%2C%5C%22vm%5C%22%3A%5C%22nw%5C%22%2C%5C%22countryNum%5C%22%3A%5C%22156%5C%22%2C%5C%22m%5C%22%3A%5C%22pc%5C%22%2C%5C%22page%5C%22%3A1%2C%5C%22n%5C%22%3A48%2C%5C%22q%5C%22%3A%5C%22%25E8%258D%25A3%25E8%2580%2580%5C%22%2C%5C%22tab%5C%22%3A%5C%22all%5C%22%2C%5C%22pageSize%5C%22%3A48%2C%5C%22totalPage%5C%22%3A100%2C%5C%22totalResults%5C%22%3A4800%2C%5C%22sourceS%5C%22%3A%5C%220%5C%22%2C%5C%22sort%5C%22%3A%5C%22_coefp%5C%22%2C%5C%22bcoffset%5C%22%3A%5C%22%5C%22%2C%5C%22ntoffset%5C%22%3A%5C%22%5C%22%2C%5C%22filterTag%5C%22%3A%5C%22%5C%22%2C%5C%22service%5C%22%3A%5C%22%5C%22%2C%5C%22prop%5C%22%3A%5C%22%5C%22%2C%5C%22loc%5C%22%3A%5C%22%5C%22%2C%5C%22start_price%5C%22%3Anull%2C%5C%22end_price%5C%22%3Anull%2C%5C%22startPrice%5C%22%3Anull%2C%5C%22endPrice%5C%22%3Anull%2C%5C%22itemIds%5C%22%3Anull%2C%5C%22p4pIds%5C%22%3Anull%7D%22%7D'

response = requests.get(url=url,headers=headers)

#获取数据
# print(response.text)
# pprint(response.text)
json_data = response.json()     #取出来的数据就是字典格式
#解析数据
# itemsArray = json_data['data']['itemsArray']
itemsArray = json_data.get('data').get('itemsArray')
for item in itemsArray:
    # title = item['title']
    title = item.get("title")
    price = item.get("priceWap")
    Sales = item.get("realSales")
    city = item.get("procity")
    shop = item.get("shopInfo").get("title")
    service = item.get("nick")
    shopurl = item.get("auctionURL")
    # print(title,price,Sales,city,shop,service,shopurl)
    with open('taobao.csv', mode='a', newline='', encoding='utf-8') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow([title,price,Sales,city,shop,service,shopurl])

五、爬取结果

版权声明和免责声明

本博客提供的所有爬虫代码和相关内容(以下简称“内容”)仅供参考和学习之用。任何使用或依赖这些内容的风险均由使用者自行承担。我(博客所有者)不对因使用这些内容而产生的任何直接或间接损失承担责任。

严禁将本博客提供的爬虫代码用于任何违法、不道德或侵犯第三方权益的活动。使用者应当遵守所有适用的法律法规,包括但不限于数据保护法、隐私权法和知识产权法。

如果您选择使用本博客的爬虫代码,您应当确保您的使用行为符合所有相关法律法规,并且不会损害任何人的合法权益。在任何情况下,我(博客所有者)均不对您的行为负责。

如果您对本声明有任何疑问,或者需要进一步的澄清,请通过我的联系方式与我联系。

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

马龙强_

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值