演唱会门票--爬取==查询

目标链接:演出

难度半颗星

爬取思路:

        1.谷歌浏览器抓包分析。

        2.抓到包后看是否有参数加密,在载荷中发现uuid参数看似加密,其实是不变的,小伙伴们别被虚晃一枪了哈。

​        3.好好好,开始码代码(最最最标准写法,没有之一)。

import requests
cookies = {
    '_lxsdk_cuid': '18b6123c42bc8-0bbb517c92e5b1-26031c51-144000-18b6123c42bc8',
    '_lxsdk': '18b6123c42bc8-0bbb517c92e5b1-26031c51-144000-18b6123c42bc8',
    '_lx_utm': '',
    '_hc.v': '46ac3e9f-476a-fe69-7bbe-4a729d947072.1698141816',
    'WEBDFPID': '04w29yxww15250u107vw43vuvw01x98v81y3876x00497958z0vy6742-2013501816250-1698141815504IMIUQIIfd79fef3d01d5e9aadc18ccd4d0c95072977',
    '_lxsdk_s': '18b6123c07d-b3-f5f-a75%7C%7C18',
}

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded',
    # 'Cookie': '_lxsdk_cuid=18b6123c42bc8-0bbb517c92e5b1-26031c51-144000-18b6123c42bc8; _lxsdk=18b6123c42bc8-0bbb517c92e5b1-26031c51-144000-18b6123c42bc8; _lx_utm=; _hc.v=46ac3e9f-476a-fe69-7bbe-4a729d947072.1698141816; WEBDFPID=04w29yxww15250u107vw43vuvw01x98v81y3876x00497958z0vy6742-2013501816250-1698141815504IMIUQIIfd79fef3d01d5e9aadc18ccd4d0c95072977; _lxsdk_s=18b6123c07d-b3-f5f-a75%7C%7C18',
    'Origin': 'https://h5.dianping.com',
    'Pragma': 'no-cache',
    'Referer': 'https://h5.dianping.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
    'mtgsig': '{"a1":"1.1","a2":1698141976232,"a3":"04w29yxww15250u107vw43vuvw01x98v81y3876x00497958z0vy6742","a5":"MQZZxZrWYoBKATXufXfWSIjtPgYAEwZy","a6":"h1.5pcWMHp7MF3eoE3Ql4ncvF01HxHFNl4pBlnNuvCS6jmwAYuorg1yPgAk4egdGxplzsVgmHpRTkGfW7i3DQzdt4sxAs6LE5+LPJmYCr424jFK5YYrigp9SlKqXZqfvjc7GMGU9bH6qIuXgcGdzGT3SE/LXIJYJJLTkqN1iYI7nCVIxw6pHFXGzMxuMDHyPSOcwO2BvJAFVKglA6Jl9Ar1uy6ebd6PRWGPMl+Jt46SBlMf3vXmGeET3CUg9H4qiTM/yqil4r3RuQNJdFUJJHWrJj49ZLgBw+e2cU0zxX/xlgrtZVrXUZdezOScyPw5VdKEZ9ibET4zTpr9De8Zct5Qlp6hYou3mKH9BoqK2H1TZLG091hiv87TNqFw6F4nYUSOGwgAE2Hipk3N0kyLE3LHkzg==","x0":4,"d1":"40eb8b3923001a5dc723d14d63eb9161"}',
    'sec-ch-ua': '"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

params = {
    'needRank': 'true',
    'labelId': '0',
    'optimus_risk_level': '71',
    'optimus_code': '10',
    'uuid': '62r94ya7oiui39oxuq92pql00wtb96jd2e0gab5z1lk9yqtxa6g5wx02a5k7ykuu',
    'sellChannel': '1',
    'cityId': '299',
    'yodaReady': 'h5',
    'csecplatform': '4',
    'csecversion': '2.2.1',
}

response = requests.get(
    'https://m.dianping.com/myshow/ajax/performances/1;st=0;p=1;s=20;tft=0;marketLevel=0',
    params=params,
    cookies=cookies,
    headers=headers,
).json()
print(response)

打印结果看一下呗:

依打印结果来看,跟网页上的一模一样,这时候我们就确定了,这是我们想要的数据,接下来的开始数据提取,这不是简单的一批么?对于大家来说~

提取数据,提取的字段演唱会的名称、时间、地址、打折情况、价格区间

info_lis = response['data']
for i in info_lis:
    dic = {
        '演唱会名': i['shortName'],
        '演唱时间': i['showTimeRange'],
        '演唱地址': i['address'],
        '打折': i['minDiscount'],
        '价格': i['priceRange'],
    }
    print(dic)

结果:

最后保存数据:

df = pd.DataFrame(result_lis)
writer = pd.ExcelWriter('演唱会.xlsx')
df.to_excel(writer, index=False)
writer.close()

保存结果:

爬取其他栏目只需要把url中的performances/1  1变为其他数字即可。

整体代码:   

# -*- coding: UTF-8 -*-

"""
@Time    : 2023/10/24 18:08
@Author  : 蝌蚪啊@
@File    : 音乐会_spider.py
"""
import pandas as pd
import requests
result_lis = []
cookies = {
    '_lxsdk_cuid': '18b6123c42bc8-0bbb517c92e5b1-26031c51-144000-18b6123c42bc8',
    '_lxsdk': '18b6123c42bc8-0bbb517c92e5b1-26031c51-144000-18b6123c42bc8',
    '_lx_utm': '',
    '_hc.v': '46ac3e9f-476a-fe69-7bbe-4a729d947072.1698141816',
    'WEBDFPID': '04w29yxww15250u107vw43vuvw01x98v81y3876x00497958z0vy6742-2013501816250-1698141815504IMIUQIIfd79fef3d01d5e9aadc18ccd4d0c95072977',
    '_lxsdk_s': '18b6123c07d-b3-f5f-a75%7C%7C18',
}

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded',
    # 'Cookie': '_lxsdk_cuid=18b6123c42bc8-0bbb517c92e5b1-26031c51-144000-18b6123c42bc8; _lxsdk=18b6123c42bc8-0bbb517c92e5b1-26031c51-144000-18b6123c42bc8; _lx_utm=; _hc.v=46ac3e9f-476a-fe69-7bbe-4a729d947072.1698141816; WEBDFPID=04w29yxww15250u107vw43vuvw01x98v81y3876x00497958z0vy6742-2013501816250-1698141815504IMIUQIIfd79fef3d01d5e9aadc18ccd4d0c95072977; _lxsdk_s=18b6123c07d-b3-f5f-a75%7C%7C18',
    'Origin': 'https://h5.dianping.com',
    'Pragma': 'no-cache',
    'Referer': 'https://h5.dianping.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
    'mtgsig': '{"a1":"1.1","a2":1698141976232,"a3":"04w29yxww15250u107vw43vuvw01x98v81y3876x00497958z0vy6742","a5":"MQZZxZrWYoBKATXufXfWSIjtPgYAEwZy","a6":"h1.5pcWMHp7MF3eoE3Ql4ncvF01HxHFNl4pBlnNuvCS6jmwAYuorg1yPgAk4egdGxplzsVgmHpRTkGfW7i3DQzdt4sxAs6LE5+LPJmYCr424jFK5YYrigp9SlKqXZqfvjc7GMGU9bH6qIuXgcGdzGT3SE/LXIJYJJLTkqN1iYI7nCVIxw6pHFXGzMxuMDHyPSOcwO2BvJAFVKglA6Jl9Ar1uy6ebd6PRWGPMl+Jt46SBlMf3vXmGeET3CUg9H4qiTM/yqil4r3RuQNJdFUJJHWrJj49ZLgBw+e2cU0zxX/xlgrtZVrXUZdezOScyPw5VdKEZ9ibET4zTpr9De8Zct5Qlp6hYou3mKH9BoqK2H1TZLG091hiv87TNqFw6F4nYUSOGwgAE2Hipk3N0kyLE3LHkzg==","x0":4,"d1":"40eb8b3923001a5dc723d14d63eb9161"}',
    'sec-ch-ua': '"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

params = {
    'needRank': 'true',
    'labelId': '0',
    'optimus_risk_level': '71',
    'optimus_code': '10',
    'uuid': '62r94ya7oiui39oxuq92pql00wtb96jd2e0gab5z1lk9yqtxa6g5wx02a5k7ykuu',
    'sellChannel': '1',
    'cityId': '299',
    'yodaReady': 'h5',
    'csecplatform': '4',
    'csecversion': '2.2.1',
}

response = requests.get(
    'https://m.dianping.com/myshow/ajax/performances/1;st=0;p=1;s=20;tft=0;marketLevel=0',
    params=params,
    cookies=cookies,
    headers=headers,
).json()
print(response)
info_lis = response['data']
for i in info_lis:
    dic = {
        '演唱会名': i['shortName'],
        '演唱时间': i['showTimeRange'],
        '演唱地址': i['address'],
        '打折': i['minDiscount'],
        '价格': i['priceRange'],
    }
    print(dic)
    result_lis.append(dic)

df = pd.DataFrame(result_lis)
writer = pd.ExcelWriter('演唱会.xlsx')
df.to_excel(writer, index=False)
writer.close()

今天就分享到这,祝在做的各位程序员大佬,1024节日快乐,薪水加多多~~

  • 3
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值