一、分析网站
抓取网站的主页: https://booking.1hai.cn/?from=Nav&IsBatch=false
需要选择参数,取车门店、时间等等,不选参数的话,默认的请求对我们没用,通过变更参数才能知道网站是否破解成功
具体分析抓包和请求在代码中会体现出来
二、代码
import requests
import datetime
import random
import math
import re
from lxml.html import etree
import execjs
import json
class Yihai:
def __init__(self):
self.base_url = 'https://booking.1hai.cn/?from=Nav&IsBatch=false'
self.first_url = 'https://booking.1hai.cn/Order/FirstStep/LoadCarTypeData'
self.two_url = 'https://booking.1hai.cn/'
def get_cookies(self):
"""
获取cookies
:return:
"""
response = requests.get(self.base_url)
if response.status_code == 200:
set_cookie = response.headers['Set-Cookie']
need_list = ['1010902oday', '1010902oref', '1010902r', 'ASP.NET_SessionId', 'fr_safety']
cookies = self.parse_setcookie(set_cookie, need_list)
token = re.findall('<input name="__RequestVerificationToken" type="hidden" value="(.*?)" />', response.text)[0]
return cookies, token
else:
print(f'错误响应码为:{response.status_code}')
def first_request(self, cookies, token):
print('第一次请求')
sign_1 = self.generate_sign()
sign_2 = self.generate_sign()
requests_id = '|' + sign_1 + '.' + sign_2
headers = {
'__RequestVerificationToken': token,
'Request-Id': requests_id,
'Referer': 'https://booking.1hai.cn/?from=Nav&IsBatch=false',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
}
timestamp, uct_date = self.generate_timestamp()
ai_session = self.generate_sign() + '|' + timestamp + '|' + timestamp
ai_user = self.generate_sign() + '|' + uct_date
js = 'true'
cookies = {
**cookies,
'ai_session': ai_session,
'ai_user': ai_user,
'js': js,
}
data = {'PriceLevel': '', 'Brands': '', 'Gear': '', 'Type': '', 'Seat': '', 'SortBy': '', 'IsEnterprise': 'false'}
response = requests.post(self.first_url, data=data, cookies=cookies, headers=headers)
# print(response.status_code)
# print(response.text)
# self.parse_html(response.text)
set_cookie = response.headers['Set-Cookie']
need_list = ['1010902oat', '1010902tk']
update_cookies = self.parse_setcookie(set_cookie, need_list)
cookies.update(update_cookies)
cookies['sajssdk_2015_cross_new_user'] = '1'
cookies['sensorsdata2015jssdkcross'] = self.sensorsdata2015jssdkcross()
print(cookies)
return cookies, sign_1
def two_bak(self, cookies, token, sign_1):
"""
效验前面的参数是否正确
正常响应是: {"IsSuccess":true,"Message":"000000"}
:return:
"""
data = { 'PickUpServiceAddress': '', 'IsSendService':'false','ReturnServiceAddress':'','IsReturnService':'false','PickUpDate':'2021-01-12','ReturnDate':'2021-01-14','PickUpCityId':'5','PickUpStoreId':'881','ReturnCityId':'5','ReturnStoreId':'881','FlightNumber':'','PickUpServiceDto.Lat':'','PickUpServiceDto.Lng':'','ReturnServiceDto.Lat':'','ReturnServiceDto.Lng':'','PickUpServiceDto.Address':'','ReturnServiceDto.Address':'','PickUpServiceDto.IsFree':'False','ReturnServiceDto.IsFree':'False','ReturnHour':'18','PickUpHour':'18','ReturnMinute':'0','PickUpMinute':'0'}
sign_2 = self.generate_sign()
requests_id = '|' + sign_1 + '.' + sign_2
headers = {
'__RequestVerificationToken': token,
'Request-Id': requests_id,
'Content-Type': 'application/json;charset=UTF-8',
'Referer': 'https://booking.1hai.cn/?from=Nav&IsBatch=false',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
}
response = requests.post(url='https://booking.1hai.cn/Order/Validate/Index', data=json.dumps(data), cookies=cookies, headers=headers)
print(response.status_code)
print('校验响应= ', response.text)
def two_request(self, cookies, token, sign_1):
"""
第二次请求
:param token:
:return:
"""
print('第二次请求')
data = {
'FlightNumber': '',
'IsReturnService': 'false',
'IsSendService': 'false',
'PickUpCity': '北京',
'PickUpCityId': '5',
'PickUpDate': '2021-01-13',
'PickUpHour': '18',
'PickUpMinute': '0',
'PickUpServiceAddress': '',
'PickUpServiceDto.Address': '',
'PickUpServiceDto.IsFree': 'False',
'PickUpServiceDto.Lat': '',
'PickUpServiceDto.Lng': '',
'PickUpStoreId': '881',
'PickUpStoreName': '首都机场T1店',
'ReturnCity': '北京',
'ReturnCityId': '5',
'ReturnDate': '2021-01-15',
'ReturnHour': '18',
'ReturnMinute': '0',
'ReturnServiceAddress': '',
'ReturnServiceDto.Address': '',
'ReturnServiceDto.IsFree': 'False',
'ReturnServiceDto.Lat': '',
'ReturnServiceDto.Lng': '',
'ReturnStoreId': '881',
'ReturnStoreName': '首都机场T1店',
'X-Requested-With': 'XMLHttpRequest'
}
sign_2 = self.generate_sign()
requests_id = '|' + sign_1 + '.' + sign_2
headers = {
'__RequestVerificationToken': token,
'Request-Id': requests_id,
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': 'https://booking.1hai.cn/?from=Nav&IsBatch=false',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
}
response = requests.post(self.two_url, data=data, cookies=cookies, headers=headers)
set_cookie = response.headers['Set-Cookie']
need_list = ['1010902oday', '1010902r', '1010902pr']
update_cookies = self.parse_setcookie(set_cookie, need_list)
cookies.update(update_cookies)
return sign_1, cookies
def three_request(self, cookies, sign_1, token):
"""
第三次请求
:param sign_1:
:param token:
:return:
"""
print('第三次请求')
data = { 'PriceLevel':'','Brands':'','Gear':'','Type':'','Seat':'','SortBy':'','IsEnterprise':'false'}
headers = {
'__RequestVerificationToken': token,
'Request-Id': '|' + sign_1 + '.' + self.generate_sign(),
'Referer': 'https://booking.1hai.cn/?from=Nav&IsBatch=false',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
}
response = requests.post(self.first_url, data=data, cookies=cookies, headers=headers)
self.parse_html(response.text)
@staticmethod
def parse_setcookie(set_cookie, need_list):
"""
解析setcookie
:param set_cookie: 响应的set_cookie
:param need_list: 需要解析的字段列表
:return:
"""
set_cookie_list = set_cookie.replace(',', ';').split(';')
cookie = {}
for k in set_cookie_list:
for i in need_list:
if i in k:
k_v = k.split('=', 1)
k = k_v[0].strip(' ')
v = k_v[1].strip(' ')
cookie[k] = v
return cookie
@staticmethod
def generate_timestamp():
"""
生成时间戳和 UTC 时间戳
:return:
"""
now_time = datetime.datetime.now()
timestamp = str(datetime.datetime.timestamp(now_time) * 1000)
utc_time = str(now_time.utcnow())
utc_time_list = utc_time.split()
utc_date = utc_time_list[0] + 'T' + utc_time_list[-1][:-3] + 'Z'
# print(f'时间戳: {timestamp} UTC时间: {utc_date}')
return timestamp, utc_date
@staticmethod
def generate_sign():
"""
生成一段加密参数
:return:
"""
salt = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
t = 1073741824 * random.random()
e = ''
while t > 0:
e += salt[math.ceil(t % 64)]
t = int(t / 64)
return e
@staticmethod
def parse_html(text):
"""
解析HTML
:param text:
:return:
"""
html_xpath = etree.HTML(text)
data = html_xpath.xpath('//div[@class="wraplist"]/div')
for dt in data:
lis = dt.xpath('.//li')
car_name = lis[1].xpath('.//p[1]/span/text()')[0]
price = lis[2].xpath('.//*[@class="total-price"]/text()')
print(car_name, price)
@staticmethod
def sensorsdata2015jssdkcross():
"""
获取 sensorsdata2015jssdkcross 加密验证码
:return:
"""
height = 1080
width = 1920
n = str(height * width)
# e = hex(round(time.time()*1000)).strip('0x') + '0'
# t = hex(int(str(random.random()).replace('.', '')))
e = execjs.compile("""var e = function() {
for (var e = 1 * new Date, t = 0; e == 1 * new Date; )
t++;
return e.toString(16) + t.toString(16)
}""")
e = e.call('e')
t = execjs.compile("""t = function() {
return Math.random().toString(16).replace(".", "")
}""")
t = t.call('t')
# r = '3f604900-2073600'
r = '3f604900'
s = e + '-' + t + '-' + r + '-' + n + '-' + e
print(s)
# sensorsdata2015jssdkcross = {
# "distinct_id": s,
# "$device_id": s,
# "props": {
# "$latest_traffic_source_type": "直接流量",
# "$latest_referrer": "",
# "$latest_referrer_host": "",
# "$latest_search_keyword": "未取到值_直接打开"
# }
# }
sensorsdata2015jssdkcross = "%7B%22distinct_id%22%3A%22{}%22%2C%22%24device_id%22%3A%22{}%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D".format(s, s)
# print(sensorsdata2015jssdkcross)
return sensorsdata2015jssdkcross
def main():
"""
抓取一嗨租车
抓包分析:
第一次请求:
url: https://booking.1hai.cn/?from=Nav&IsBatch=false
获取初始化cookie和token
第二次请求:
url: https://booking.1hai.cn/Order/FirstStep/LoadCarTypeData
获取set_cookie,变更了cookie
第三次请求:
url: https://booking.1hai.cn/Order/Validate/Index (可以不要)
这个请求对后续请求没作用,唯一的作用就是能判断 前面得到的cookie、token等参数 是否正常解密
第三次请求:
url: https://booking.1hai.cn/
这个请求很重要,他会带上 我们参数, 比如门店、时间之类的参数,还会更改cookie
第四次请求
url: https://booking.1hai.cn/Order/FirstStep/LoadCarTypeData
这个请求得到最终数据
执行时 需要 调整下 two_request 方法中参数里面的日期和小时
加密那块执行报错再执行一次,那块因为时间戳在变,做运算时可能会除不尽
:return:
"""
yihai = Yihai()
cookies, token = yihai.get_cookies()
cookies, sign_1 = yihai.first_request(cookies, token)
yihai.two_bak(cookies, token, sign_1) # 可以不参与执行,可以注释掉
sign_1, cookies = yihai.two_request(cookies, token, sign_1)
yihai.three_request(cookies, sign_1, token)
if __name__ == '__main__':
main()
执行结果:
可以看到得到的结果和网站上展示的是一致的,说明这个网站破解成功,我只给出了破解的规则和流程,如果需要工程化的抓取,需要修改代码和参数,这里我就不展示了