python3爬虫根据传入日期出发地目地查询京东国内机票价格

5 篇文章 0 订阅
4 篇文章 1 订阅

话不多说直接上源码:

# coding:utf-8
import json, requests, os, time
from concurrent.futures import ThreadPoolExecutor
def computing_time(dep, arr):
    int_dep = int(dep)
    int_arr = int(arr)
    if int_arr > int_dep:
        dep_hour = int(dep[:2])
        dep_min = int(dep[2:])
        arr_hour = int(arr[:2])
        arr_min = int(arr[2:])
        totaltime = (arr_hour - dep_hour) * 60 + (arr_min - dep_min)
    else:
        dep_hour = int(dep[:2]) * 60
        dep_min = int(dep[2:])
        dep_time = dep_hour + dep_min
        print(dep_time)
        dep_time = 1440 - dep_time
        arr_hour = int(arr[:2]) * 60
        arr_min = int(arr[2:])
        arr_time = arr_hour + arr_min
        totaltime = dep_time + arr_time
    return totaltime


def get_index_data(tasks):
    # tasks = json.loads(tasks)
    dep_city = tasks.get("dep_city")
    arr_city = tasks.get("arr_city")
    dep_time = tasks.get("dep_time")
    # dep_city = "武汉"
    # arr_city = "大连"
    # dep_time = "2019-08-29"

    url = 'https://jipiao.jd.com/search/queryFlight.action?depCity={}&arrCity={}&depDate={}&arrDate={}' \
          '&queryModule=1&lineType=OW&queryType=listquery&queryuuid=&uniqueKey=&sourceId=&arrTime=&hasChild=&hasInfant='.format(
        dep_city, arr_city, dep_time, dep_time)

    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
    }

    len_flight = 0
    while True:
        # 当封ip时候返回的值
        # {"code": 200, "data": {"cabinType": null, "captchaInfo": {"authWay": 2,
        #                                                           "content": "\/\/img30.360buyimg.com\/rvc\/jfs\/t4234\/357\/2027123783\/1051\/d7daff4e\/58cadc7cN983516d7.png",
        #                                                           "uuid": "510751-c1713400-03a6-41a1-824f-aed3835c8495"},
        #                        "desc": "需要验证码校验才能查询航班", "flights": null, "interval": "1000", "isFinished": 1,
        #                        "misFlights": null, "queryDate": "", "queryuuid": "", "resultCode": -1}
        try:
            s = requests.Session()
            response = s.get(url, headers=headers, verify=False)
            res = json.loads(response.text)

            if res["data"]["desc"] == "需要验证码校验才能查询航班":
                adsl = Adsl()
                adsl.reconnect()
            elif res["data"]["desc"] == "需要用户登录才能查询航班":
                adsl = Adsl()
                adsl.reconnect()
            else:
                if res["data"]["flights"] != None:
                    l_flight = len(res["data"]["flights"])
                    print(l_flight)
                    if l_flight == len_flight:
                        res = json.loads(response.text)
                        res["depcity"] = dep_city
                        res["arrcity"] = arr_city
                        res["depdate"] = dep_time
                        return res
                    else:
                        len_flight = l_flight
            time.sleep(1)
        except:
            print("此航线不通航")


def get_detail(task_list):
    # print(task_list)
    depcity = task_list.split(',')[0]
    arrcity = task_list.split(',')[1]
    depdate = task_list.split(',')[2]
    flightnumber = task_list.split(',')[3]
    queryuuid = task_list.split(',')[4]

    # depcity = "北京"
    # arrcity = "长沙"
    # depdate = "2019-07-25"
    # queryuuid = "f26ed61a8a3a4f79a97f0c4dc92162921563756779133"
    # flightnumber = "HU7635"
    url = 'https://jipiao.jd.com/search/queryFlightCabins.action?depCity={}&arrCity={}&depDate={}&arrDate={}&queryuuid={}&uniqueKey=&sourceId=&queryType=jipiaoindexquery&queryModule=1&lineType=OW&arrTime=&hasChild=&hasInfant=&flightNo={}&seatType='.format(
        depcity, arrcity, depdate, depdate, queryuuid, flightnumber)
    headers = {
        'accept-encoding': "gzip, deflate, br",
        'accept-language': "zh-CN,zh;q=0.9",
        'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
        'accept': "application/json, text/javascript, */*; q=0.01",
        'authority': "jipiao.jd.com",
        'x-requested-with': "XMLHttpRequest",
        'Cache-Control': "no-cache",
        'Host': "jipiao.jd.com",
        'Connection': "keep-alive",
        'cache-control': "no-cache"
    }

    response = requests.request("GET", url, headers=headers)
    return response.text


def create_thread(data):
    # data = json.loads(data)
    dep_city = data["depcity"]
    arr_city = data["arrcity"]
    dep_date = data["depdate"]
    queryuuid = data['data']["queryuuid"]
    flights = data['data']["flights"]
    pool = ThreadPoolExecutor(20)
    task_list = []
    for flight in flights:
        flightNo = flight["flightNo"]
        flight_request_id = "{},{},{},{},{}".format(dep_city, arr_city, dep_date, flightNo, queryuuid)
        task_list.append(flight_request_id)
    thread_gener = pool.map(get_detail, task_list)
    return thread_gener


def parse_data(res):
    dicts = {}
    routing = []
    for data in res:
        response = json.loads(data)
        flight = response.get('data').get('flight')
        allCabins = flight.get('bingoClassInfoList')
        cabinMap = {"Y": "经济舱", "C": "公务舱", "F": "头等舱"}
        product = {
            "salePrice": allCabins[0].get('adtInfPrice'),
            "cabinGrade": allCabins[0].get('classNoCn'),
            "childSalePrice": allCabins[0].get('childSalePrice'),
            "productData": "",
            "cabin": "",
            "rule": {"canChange": True, "ruleList": [
                {"returnPre": None, "returnFee": None, "timeText": None, "time": None, "changeFee": None,
                 "changePre": None}], "allowTransfer": False, "canRefund": True, "isAirlineRule": True},
            "publishPrice": 0.0,
            "seatCount": 3,
            "childSeatCount": "",
            "productType": 1,
            "productDes": "\u5b98\u7f51",
            "childCabin": ""
        }
        cabinInfoList = []
        for perCabin in allCabins:
            seatCount = perCabin.get('seatNum')
            if seatCount == "A":
                seatCount = 9
            else:
                seatCount = int(seatCount)
            cabinInfo = {
                "cabinGrade": perCabin.get('classNoCn'),
                "rate": round(float(perCabin.get('discount')) / 10, 2),
                "cabin": perCabin.get('classNo'),
                "seatCount": seatCount,
                "adultFare": perCabin.get('adtInfPrice'),
                "luggageDes": perCabin.get('luggageText'),
                "billExpressDes": ""
            }
            cabinInfoList.append(cabinInfo)
        segments = {
            "stopCities": "",
            "discount": round(float(allCabins[0].get('discount')) / 10, 2),
            "chdConstrucFee": allCabins[0].get('childBuildFee'),
            "duration": computing_time(flight.get('depTime'), flight.get('arrTime')),
            "stopAirports": "",
            "codeShare": False,
            "chdFuleFee": 0.0,
            "depTerminal": flight.get('depTerminal'),
            "arrAirport": flight.get('arrCity'),
            "construcFee": None,
            "fuleFee": 0.0,
            "operatingFlightNo": "",
            "depTime": flight.get('depDate') + " " + flight.get('depTime')[:2] + ":" + flight.get('depTime')[2:],
            "publishPrice": 1232.0,
            "depAirport": flight.get('depCity'),
            "flightNumber": flight.get('flightNo'),
            "carrier": flight.get('airways'),
            "aircraftCode": response.get('data').get('misFlight').get('planeStyle'),
            "arrTerminal": flight.get('arrTerminal'),
            "arrTime": flight.get('arrDate') + " " + flight.get('arrTime')[:2] + ":" + flight.get('arrTime')[2:],
            "cabinInfo": cabinInfoList
        }

        returnFormat = {
            "fromRoutings": {"productList": product, "segments": [segments]}
        }
        routing.append(returnFormat)
    dicts["msg"] = "SUCCESS"
    dicts["sessionId"] = ""
    dicts["status"] = 0
    dicts["routing"] = routing
    return json.dumps(dicts)


def main(tasks):
    response = get_index_data(tasks)
    thread_gener = create_thread(response)
    data = parse_data(list(thread_gener))
    return data


if __name__ == '__main__':
    tasks = {
        "dep_city": "北京",
        "arr_city": "上海",
        "dep_time": "2019-09-26"
    }
    response = get_index_data(tasks)
    thread_gener = create_thread(response)
    data = parse_data(list(thread_gener))
    print(data)

此代码仅供参考。
京东的反爬策略是滑块验证码,可以在代码里加入代理IP,我之前用的是windows服务器,所以用的切换adsl拨号,这里没写出来。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值