12306 火车票 Python爬虫 车次信息及经停站信息

以下代码为爬取12306官网的信息,有兴趣的也可以爬取 http://www.jt2345.com/ 

import requests
import json

def get_City_data():
    '''
    城市代码对应表获取,生成
    '''
    # 12306官网车站信息版本更新比较快,记得更新 station_version 
    url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9099'
    # 伪造浏览器登陆
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36'
        }
    response = requests.request('GET', url, headers=headers)
    html = response.text
    station_data = html.split('=')[1].split('@')
    ret = {}
    # unit :
    # 'zyi|遵义|ZYE|zunyi|zy|2844'
    for unit in station_data:
        if '|' not in unit:
            continue
        ret[unit.split('|')[1]] = unit.split('|')[2]
    return ret

def get_trains_info(date,from_station,to_station):
    '''
    火车信息查询方法函数
    '''
    url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT'.format(date,from_station,to_station)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3664.3 Safari/537.36'
        }
    response = requests.request('GET', url, headers=headers)
    html = response.text
    return html

def get_stopover_station(train_no,start_station,end_station,date):
    url = 'https://kyfw.12306.cn/otn/czxx/queryByTrainNo?train_no={}&from_station_telecode={}&to_station_telecode={}&depart_date={}'.format(train_no,start_station,end_station,date)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3664.3 Safari/537.36'
        }
    response = requests.request('GET', url, headers=headers)
    html = response.text
    return html

def main(date,startcity,endcity):
    # 获取全国的火车站的名字及代码对应表
    data = get_City_data()
    # 返回火车票接口查询数据
    info = get_trains_info(date,data[startcity],data[endcity])
    # info:
    #{"data": {flag: "1", "map": {BJP: "北京", SHH: "上海", VNP: "北京南"}, "result": ["fJgAiGfEp8nqr1jaud1N4b17f5xCreizn6xvm1dYBmvnv7XaBUThOzlgUCGDqoQE7FMXDHXbs%2FmF%0AoNEJ5knMgu3FMRS%2BCSeBHg6qwiUjWNjG7c5wloxNt5tniSJWOHzp4gHcLIOpuYlt8nWl2KZ2jpQm%0AmMiRGifL0kDc1HuT7YdnxR9HguLSHmoHSbwlB6mN7vgdvrpawfKTgpqmaGVBQyYHzEmudY3rERDT%0AwkP80Z%2BWT7qy31A5nR2bzaTV1fCbfsigP3LxN6xLwLaz5cbztz9eVoxw%2BrAhe41qJA%3D%3D|预订|550000D70600|D706|SHH|BJP|SHH|BJP|21:18|09:24|12:06|Y|O3UuolZoM0VVKhi%2FsTBmHHJ2J7g9sWHQqUjUpqLijyvSO4lo|20190415|3|H6|01|05|0|0||||1|||||无||无||||O0I0J0|OIJ|1|0|null","HTuDCpXyllRUigLOrcyIG2mSFjI5lvN6sRCZrDm5FEXFO75MR4RghD8ytY5m1JbuPhGFawgi58xv%0AvKyoLRGqlKSL5WuLn%2BCqCb2EArhsW%2FdWpCcMRevIk1LXVChmfgUlCMG6otP%2BpHnr%2BeIirRQz2few%0AaARvKzPbdcIBTeL2ZS33wxFYgFBTRLgJmsXJPebK3Gkww%2FzIs63pKPdzIiqpY31YoahXc5V1jAbf%0Akv6IFoar%2BNqPQgZiqufol6CHji%2BMSAi%2FwBc8gsdZK2E5jKJ4hdCn2%2FNfB%2BhchCt6BFCFOvw%3D|预订|550000D71000|D710|SHH|VNP|SHH|VNP|21:24|09:22|11:58|Y|bE6T6Vz57B64uj%2F6waOgksuwVoVWE3c5zK0at8%2FT79ouH9uj|20190415|3|H6|01|05|1|0||||无|||||2||无||||O0I0J0|OIJ|0|0|null"]},"httpstatus": 200,"messages": "","status": true}
    hjson = json.loads(info)["data"]["result"]
    # raw_train:
    # "|预订|550000Z16420|Z164|SHH|LSO|SHH|LSO|20:08|19:30|47:22|N|SIygZtG7LXRkGXyeINjk7T5kNo40ywRzkHSmSiTp6MyIlkHIGPlTcQfsc9U%3D|20190303|3|H2|01|14|0|0||||无|||无||无|无|||||10401030|1413|0|0|null"
    for raw_train in hjson:
        # 循环遍历每辆列车的信息
        data_list = raw_train.split('|')
        # 车次号码
        train_no = data_list[3]
        # 出发站
        from_station_name = startcity
        # 终点站
        to_station_name = endcity
        # 出发时间
        start_time = data_list[8]
        # 到达时间
        arrive_time = data_list[9]
        # 总耗时
        time_used_up = data_list[10]
        # 一等座
        first_class_seat = data_list[31] or '--'
        # 二等座
        second_class_seat = data_list[30] or '--'
        # 软卧
        soft_sleep = data_list[23] or '--'
        # 硬卧
        hard_sleep = data_list[28] or '--'
        # 硬座
        hard_seat = data_list[29] or '--'
        # 无座
        no_seat = data_list[26] or '--'

        list = ('车次:{} 出发站:{} 目的地:{} 出发时间:{} 到达时间:{} 火车运行时间:{} 座位情况:\n 一等座:「{}」 二等座:「{}」 软卧:「{}」 硬卧:「{}」 硬座:「{}」 无座:「{}」'.format(train_no, from_station_name, to_station_name, start_time, arrive_time, time_used_up, first_class_seat,second_class_seat, soft_sleep, hard_sleep, hard_seat, no_seat))

        print('*'*100)
        print(list)
        print('*'*100)
        print('经停站信息:')
        
        stop = get_stopover_station(data_list[2],data_list[6],data_list[7],date)
        stop_json = json.loads(stop)["data"]["data"]
        
        for i in range(len(stop_json)):
            if i != len(stop_json)-1:
                print('车次:{} 出发站:{} 到达站:{} 出发时间:{} 到达时间:{} '.format(train_no,stop_json[i]['station_name'],stop_json[i+1]['station_name'],stop_json[i]['start_time'],stop_json[i+1]['arrive_time']))


if __name__ == '__main__':
    try:
        date = str(input('请输入日期(格式“2019-01-01”): '))
        startcity = str(input('出发站: '))
        endcity = str(input('到达站:'))
        main(date,startcity,endcity)
    except KeyError:
        print('您输入的数据有问题,请重新输入')

 效果图如下:

 

评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值