获取携程机票信息(爬虫)

仅供个人学习使用!

2022-01-01 版

# @author: AIslandX
# @date: 2022-01-01

import hashlib
import json
import logging
import random
import time

import requests
from fake_useragent import UserAgent

# 参考文章:
#   - 机场列表 - 维基百科
#     https://zh.wikipedia.org/wiki/%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD%E6%9C%BA%E5%9C%BA%E5%88%97%E8%A1%A8
#   - 携程国际机票sign破解 https://blog.csdn.net/weixin_38927522/article/details/108214323
#   - 至于前端反反爬虫,看完这篇你就可以毕业了 https://zhuanlan.zhihu.com/p/250176143


ua = UserAgent()


def get_cookie_bfa():
    random_str = "abcdefghijklmnopqrstuvwxyz1234567890"
    random_id = ""
    for _ in range(6):
        random_id += random.choice(random_str)
    t = str(int(round(time.time() * 1000)))

    bfa_list = ["1", t, random_id, "1", t, t, "1", "1"]
    bfa = "_bfa={}".format(".".join(bfa_list))
    # e.g. _bfa=1.1639722810158.u3jal2.1.1639722810158.1639722810158.1.1
    return bfa


# 获取调用携程 API 查询航班接口 Header 中所需的参数 sign
def get_sign(transaction_id, departure_city_code, arrival_city_code, departure_date):
    sign_value = transaction_id + departure_city_code + arrival_city_code + departure_date
    _sign = hashlib.md5()
    _sign.update(sign_value.encode('utf-8'))
    return _sign.hexdigest()


# 获取 transactionID 及航线数据
def get_transaction_id(departure_city_code, arrival_city_code, departure_date, cabin):
    flight_list_url = "https://flights.ctrip.com/international/search/api/flightlist" \
                      "/oneway-{}-{}?_=1&depdate={}&cabin={}&containstax=1" \
        .format(departure_city_code, arrival_city_code, departure_date, cabin)
    flight_list_req = requests.get(url=flight_list_url)
    if flight_list_req.status_code != 200:
        logging.error("get transaction id failed, status code {}".format(flight_list_req.status_code))
        return "", None

    try:
        flight_list_data = flight_list_req.json()["data"]
        transaction_id = flight_list_data["transactionID"]
    except Exception as e:
        logging.error("get transaction id failed, {}".format(e))
        return "", None

    return transaction_id, flight_list_data


# 获取航线具体信息与航班数据
def get_flight_info(departure_city_code, arrival_city_code, departure_date, cabin):
    # 获取 transactionID 及航线数据
    transaction_id, flight_list_data = get_transaction_id(departure_city_code, arrival_city_code, departure_date, cabin)
    if transaction_id == "" or flight_list_data is None:
        return False, None

    # 获取调用携程 API 查询航班接口 Header 中所需的参数 sign
    sign = get_sign(transaction_id, departure_city_code, arrival_city_code, departure_date)

    # cookie 中的 bfa
    bfa = get_cookie_bfa()

    # 构造请求,查询数据
    search_url = "https://flights.ctrip.com/international/search/api/search/batchSearch"
    search_headers = {
        "transactionid": transaction_id,
        "sign": sign,
        "scope": flight_list_data["scope"],
        "origin": "https://flights.ctrip.com",
        "referer": "https://flights.ctrip.com/online/list/oneway-{}-{}"
                   "?_=1&depdate={}&cabin={}&containstax=1".format(departure_city_code, arrival_city_code,
                                                                   departure_date, cabin),
        "content-type": "application/json;charset=UTF-8",
        "user-agent": ua.chrome,
        "cookie": bfa,
    }
    r = requests.post(url=search_url, headers=search_headers, data=json.dumps(flight_list_data))

    if r.status_code != 200:
        logging.error("get flight info failed, status code {}".format(r.status_code))
        return False, None

    try:
        result_json = r.json()
        if result_json["data"]["context"]["flag"] != 0:
            logging.error("get flight info failed, {}".format(result_json))
            return False, None
    except Exception as e:
        logging.error("get flight info failed, {}".format(e))
        return False, None

    if "flightItineraryList" not in result_json["data"]:
        result_data = []
    else:
        result_data = result_json["data"]["flightItineraryList"]
    return True, result_data


if __name__ == '__main__':
    # 日志通用配置
    LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
    logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

    # 离开城市代码
    departureCityCode = "TNA"
    # 到达城市代码
    arrivalCityCode = "CGQ"
    # 离开时间
    departureDate = "2022-01-29"
    # 飞机舱位 Y - 经济舱
    # 参考:https://baike.baidu.com/item/%E9%A3%9E%E6%9C%BA%E8%88%B1%E4%BD%8D/4764328
    cabin = "Y"

    # departureCityCode, arrivalCityCode, departureDate = "GOQ", "CGQ", "2022-01-29"

    ok, example_result = get_flight_info(departureCityCode, arrivalCityCode, departureDate, cabin)
    if ok:
        print(json.dumps(example_result, ensure_ascii=False))
    else:
        print("获取失败")

  • 4
    点赞
  • 39
    收藏
    觉得还不错? 一键收藏
  • 15
    评论
以下是爬取携程机票信息的Python代码,使用了requests和BeautifulSoup库: ```python import requests from bs4 import BeautifulSoup # 构造请求头 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} # 构造请求参数 params = { 'DCity1': 'BJS', # 出发地 'ACity1': 'SHA', # 目的地 'SearchType': 'S', # 单程 'DepDate1': '2021-12-01', # 出发日期 'IsLowestPrice': 'false', # 不是最低价 'Airline': '', # 航空公司 'FlightNo': '', # 航班号 'IsDirectFlight': 'false', # 不是直达 'SortType': 'a', # 排序方式 'Catalog': '', # 目录 'AllianceID': '', # 联盟ID 'SID': '', # SID 'OUID': '', # OUID 'IsInternal': 'false', # 不是内部 'Action': 'Query', # 查询 'IsDomestic': 'true' # 国内 } # 发送请求 response = requests.get('https://flights.ctrip.com/itinerary/oneway/BJS-SHA', headers=headers, params=params) # 解析网页 soup = BeautifulSoup(response.text, 'html.parser') # 获取机票信息 flight_list = soup.find_all('div', class_='flight_item') # 输出机票信息 for flight in flight_list: print('航班号:', flight['data-flight']) print('航空公司:', flight['data-airline']) print('起飞时间:', flight.find('span', class_='depart_time').text) print('到达时间:', flight.find('span', class_='arrive_time').text) print('出发机场:', flight.find('span', class_='airport').text) print('到达机场:', flight.find_all('span', class_='airport')[1].text) print('机型:', flight.find('span', class_='plane').text) print('价格:', flight.find('span', class_='base_price02').text) print('--------') ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 15
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值