抓取一嗨租车

一、分析网站

     抓取网站的主页: https://booking.1hai.cn/?from=Nav&IsBatch=false

     需要选择参数,取车门店、时间等等,不选参数的话,默认的请求对我们没用,通过变更参数才能知道网站是否破解成功

     具体分析抓包和请求在代码中会体现出来

二、代码

import requests
import datetime
import random
import math
import re
from lxml.html import etree
import execjs
import json


class Yihai:
    def __init__(self):
        self.base_url = 'https://booking.1hai.cn/?from=Nav&IsBatch=false'
        self.first_url = 'https://booking.1hai.cn/Order/FirstStep/LoadCarTypeData'
        self.two_url = 'https://booking.1hai.cn/'

    def get_cookies(self):
        """
        获取cookies
        :return:
        """
        response = requests.get(self.base_url)
        if response.status_code == 200:
            set_cookie = response.headers['Set-Cookie']
            need_list = ['1010902oday', '1010902oref', '1010902r', 'ASP.NET_SessionId', 'fr_safety']
            cookies = self.parse_setcookie(set_cookie, need_list)
            token = re.findall('<input name="__RequestVerificationToken" type="hidden" value="(.*?)" />', response.text)[0]
            return cookies, token
        else:
            print(f'错误响应码为:{response.status_code}')


    def first_request(self, cookies, token):
        print('第一次请求')
        sign_1 = self.generate_sign()
        sign_2 = self.generate_sign()
        requests_id = '|' + sign_1 + '.' + sign_2
        headers = {
            '__RequestVerificationToken': token,
            'Request-Id': requests_id,
            'Referer': 'https://booking.1hai.cn/?from=Nav&IsBatch=false',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
        }
        timestamp, uct_date = self.generate_timestamp()
        ai_session = self.generate_sign() + '|' + timestamp + '|' + timestamp
        ai_user = self.generate_sign() + '|' + uct_date
        js = 'true'
        cookies = {
            **cookies,
            'ai_session': ai_session,
            'ai_user': ai_user,
            'js': js,
        }
        data = {'PriceLevel': '', 'Brands': '', 'Gear': '', 'Type': '', 'Seat': '', 'SortBy': '', 'IsEnterprise': 'false'}
        response = requests.post(self.first_url, data=data, cookies=cookies, headers=headers)
        # print(response.status_code)
        # print(response.text)
        # self.parse_html(response.text)
        set_cookie = response.headers['Set-Cookie']
        need_list = ['1010902oat', '1010902tk']
        update_cookies = self.parse_setcookie(set_cookie, need_list)
        cookies.update(update_cookies)

        cookies['sajssdk_2015_cross_new_user'] = '1'
        cookies['sensorsdata2015jssdkcross'] = self.sensorsdata2015jssdkcross()
        print(cookies)
        return cookies, sign_1


    def two_bak(self, cookies, token, sign_1):
        """
        效验前面的参数是否正确
        正常响应是: {"IsSuccess":true,"Message":"000000"}
        :return:
        """
        data = { 'PickUpServiceAddress': '', 'IsSendService':'false','ReturnServiceAddress':'','IsReturnService':'false','PickUpDate':'2021-01-12','ReturnDate':'2021-01-14','PickUpCityId':'5','PickUpStoreId':'881','ReturnCityId':'5','ReturnStoreId':'881','FlightNumber':'','PickUpServiceDto.Lat':'','PickUpServiceDto.Lng':'','ReturnServiceDto.Lat':'','ReturnServiceDto.Lng':'','PickUpServiceDto.Address':'','ReturnServiceDto.Address':'','PickUpServiceDto.IsFree':'False','ReturnServiceDto.IsFree':'False','ReturnHour':'18','PickUpHour':'18','ReturnMinute':'0','PickUpMinute':'0'}
        sign_2 = self.generate_sign()
        requests_id = '|' + sign_1 + '.' + sign_2
        headers = {
            '__RequestVerificationToken': token,
            'Request-Id': requests_id,
            'Content-Type': 'application/json;charset=UTF-8',
            'Referer': 'https://booking.1hai.cn/?from=Nav&IsBatch=false',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
        }
        response = requests.post(url='https://booking.1hai.cn/Order/Validate/Index', data=json.dumps(data), cookies=cookies, headers=headers)
        print(response.status_code)
        print('校验响应= ', response.text)


    def two_request(self, cookies, token, sign_1):
        """
        第二次请求
        :param token:
        :return:
        """
        print('第二次请求')
        data = {
             'FlightNumber': '',
             'IsReturnService': 'false',
             'IsSendService': 'false',
             'PickUpCity': '北京',
             'PickUpCityId': '5',
             'PickUpDate': '2021-01-13',
             'PickUpHour': '18',
             'PickUpMinute': '0',
             'PickUpServiceAddress': '',
             'PickUpServiceDto.Address': '',
             'PickUpServiceDto.IsFree': 'False',
             'PickUpServiceDto.Lat': '',
             'PickUpServiceDto.Lng': '',
             'PickUpStoreId': '881',
             'PickUpStoreName': '首都机场T1店',
             'ReturnCity': '北京',
             'ReturnCityId': '5',
             'ReturnDate': '2021-01-15',
             'ReturnHour': '18',
             'ReturnMinute': '0',
             'ReturnServiceAddress': '',
             'ReturnServiceDto.Address': '',
             'ReturnServiceDto.IsFree': 'False',
             'ReturnServiceDto.Lat': '',
             'ReturnServiceDto.Lng': '',
             'ReturnStoreId': '881',
             'ReturnStoreName': '首都机场T1店',
             'X-Requested-With': 'XMLHttpRequest'
        }
        sign_2 = self.generate_sign()
        requests_id = '|' + sign_1 + '.' + sign_2
        headers = {
            '__RequestVerificationToken': token,
            'Request-Id': requests_id,
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'Referer': 'https://booking.1hai.cn/?from=Nav&IsBatch=false',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
        }
        response = requests.post(self.two_url, data=data, cookies=cookies, headers=headers)

        set_cookie = response.headers['Set-Cookie']
        need_list = ['1010902oday', '1010902r', '1010902pr']
        update_cookies = self.parse_setcookie(set_cookie, need_list)
        cookies.update(update_cookies)
        return sign_1, cookies


    def three_request(self, cookies, sign_1, token):
        """
        第三次请求
        :param sign_1:
        :param token:
        :return:
        """
        print('第三次请求')
        data = { 'PriceLevel':'','Brands':'','Gear':'','Type':'','Seat':'','SortBy':'','IsEnterprise':'false'}
        headers = {
            '__RequestVerificationToken': token,
            'Request-Id': '|' + sign_1 + '.' + self.generate_sign(),
            'Referer': 'https://booking.1hai.cn/?from=Nav&IsBatch=false',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
        }
        response = requests.post(self.first_url, data=data, cookies=cookies, headers=headers)
        self.parse_html(response.text)


    @staticmethod
    def parse_setcookie(set_cookie, need_list):
        """
        解析setcookie
        :param set_cookie: 响应的set_cookie
        :param need_list: 需要解析的字段列表
        :return:
        """
        set_cookie_list = set_cookie.replace(',', ';').split(';')
        cookie = {}
        for k in set_cookie_list:
            for i in need_list:
                if i in k:
                    k_v = k.split('=', 1)
                    k = k_v[0].strip(' ')
                    v = k_v[1].strip(' ')
                    cookie[k] = v
        return cookie


    @staticmethod
    def generate_timestamp():
        """
        生成时间戳和 UTC 时间戳
        :return:
        """
        now_time = datetime.datetime.now()
        timestamp = str(datetime.datetime.timestamp(now_time) * 1000)
        utc_time = str(now_time.utcnow())
        utc_time_list = utc_time.split()
        utc_date = utc_time_list[0] + 'T' + utc_time_list[-1][:-3] + 'Z'
        # print(f'时间戳: {timestamp}  UTC时间:   {utc_date}')
        return timestamp, utc_date


    @staticmethod
    def generate_sign():
        """
        生成一段加密参数
        :return:
        """
        salt = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
        t = 1073741824 * random.random()
        e = ''
        while t > 0:
            e += salt[math.ceil(t % 64)]
            t = int(t / 64)
        return e


    @staticmethod
    def parse_html(text):
        """
        解析HTML
        :param text:
        :return:
        """
        html_xpath = etree.HTML(text)
        data = html_xpath.xpath('//div[@class="wraplist"]/div')
        for dt in data:
            lis = dt.xpath('.//li')
            car_name = lis[1].xpath('.//p[1]/span/text()')[0]
            price = lis[2].xpath('.//*[@class="total-price"]/text()')
            print(car_name, price)


    @staticmethod
    def sensorsdata2015jssdkcross():
        """
        获取 sensorsdata2015jssdkcross 加密验证码
        :return:
        """
        height = 1080
        width = 1920
        n = str(height * width)

        # e = hex(round(time.time()*1000)).strip('0x') + '0'
        # t = hex(int(str(random.random()).replace('.', '')))

        e = execjs.compile("""var e = function() {
                        for (var e = 1 * new Date, t = 0; e == 1 * new Date; )
                            t++;
                        return e.toString(16) + t.toString(16)
                    }""")
        e = e.call('e')

        t = execjs.compile("""t = function() {
                return Math.random().toString(16).replace(".", "")
            }""")
        t = t.call('t')

        # r = '3f604900-2073600'
        r = '3f604900'
        s = e + '-' + t + '-' + r + '-' + n + '-' + e
        print(s)
        # sensorsdata2015jssdkcross = {
        #     "distinct_id": s,
        #     "$device_id": s,
        #     "props": {
        #         "$latest_traffic_source_type": "直接流量",
        #         "$latest_referrer": "",
        #         "$latest_referrer_host": "",
        #         "$latest_search_keyword": "未取到值_直接打开"
        #     }
        # }
        sensorsdata2015jssdkcross = "%7B%22distinct_id%22%3A%22{}%22%2C%22%24device_id%22%3A%22{}%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D".format(s, s)
        # print(sensorsdata2015jssdkcross)
        return sensorsdata2015jssdkcross



def main():
    """
    抓取一嗨租车
    抓包分析:
       第一次请求:
           url: https://booking.1hai.cn/?from=Nav&IsBatch=false
           获取初始化cookie和token

       第二次请求:
            url: https://booking.1hai.cn/Order/FirstStep/LoadCarTypeData
            获取set_cookie,变更了cookie

       第三次请求:
             url: https://booking.1hai.cn/Order/Validate/Index    (可以不要)
            这个请求对后续请求没作用,唯一的作用就是能判断 前面得到的cookie、token等参数 是否正常解密

       第三次请求:
             url: https://booking.1hai.cn/
             这个请求很重要,他会带上 我们参数, 比如门店、时间之类的参数,还会更改cookie

       第四次请求
             url: https://booking.1hai.cn/Order/FirstStep/LoadCarTypeData
             这个请求得到最终数据

    执行时 需要 调整下 two_request 方法中参数里面的日期和小时
    加密那块执行报错再执行一次,那块因为时间戳在变,做运算时可能会除不尽
    :return:
    """
    yihai = Yihai()
    cookies, token = yihai.get_cookies()

    cookies, sign_1 = yihai.first_request(cookies, token)
    yihai.two_bak(cookies, token, sign_1)  # 可以不参与执行,可以注释掉
    sign_1, cookies = yihai.two_request(cookies, token, sign_1)
    yihai.three_request(cookies, sign_1, token)


if __name__ == '__main__':
    main()

执行结果:

可以看到得到的结果和网站上展示的是一致的,说明这个网站破解成功,我只给出了破解的规则和流程,如果需要工程化的抓取,需要修改代码和参数,这里我就不展示了

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值