以下代码为爬取12306官网的信息,有兴趣的也可以爬取 http://www.jt2345.com/
import requests
import json
def get_City_data():
'''
城市代码对应表获取,生成
'''
# 12306官网车站信息版本更新比较快,记得更新 station_version
url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9099'
# 伪造浏览器登陆
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36'
}
response = requests.request('GET', url, headers=headers)
html = response.text
station_data = html.split('=')[1].split('@')
ret = {}
# unit :
# 'zyi|遵义|ZYE|zunyi|zy|2844'
for unit in station_data:
if '|' not in unit:
continue
ret[unit.split('|')[1]] = unit.split('|')[2]
return ret
def get_trains_info(date,from_station,to_station):
'''
火车信息查询方法函数
'''
url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT'.format(date,from_station,to_station)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3664.3 Safari/537.36'
}
response = requests.request('GET', url, headers=headers)
html = response.text
return html
def get_stopover_station(train_no,start_station,end_station,date):
url = 'https://kyfw.12306.cn/otn/czxx/queryByTrainNo?train_no={}&from_station_telecode={}&to_station_telecode={}&depart_date={}'.format(train_no,start_station,end_station,date)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3664.3 Safari/537.36'
}
response = requests.request('GET', url, headers=headers)
html = response.text
return html
def main(date,startcity,endcity):
# 获取全国的火车站的名字及代码对应表
data = get_City_data()
# 返回火车票接口查询数据
info = get_trains_info(date,data[startcity],data[endcity])
# info:
#{"data": {flag: "1", "map": {BJP: "北京", SHH: "上海", VNP: "北京南"}, "result": ["fJgAiGfEp8nqr1jaud1N4b17f5xCreizn6xvm1dYBmvnv7XaBUThOzlgUCGDqoQE7FMXDHXbs%2FmF%0AoNEJ5knMgu3FMRS%2BCSeBHg6qwiUjWNjG7c5wloxNt5tniSJWOHzp4gHcLIOpuYlt8nWl2KZ2jpQm%0AmMiRGifL0kDc1HuT7YdnxR9HguLSHmoHSbwlB6mN7vgdvrpawfKTgpqmaGVBQyYHzEmudY3rERDT%0AwkP80Z%2BWT7qy31A5nR2bzaTV1fCbfsigP3LxN6xLwLaz5cbztz9eVoxw%2BrAhe41qJA%3D%3D|预订|550000D70600|D706|SHH|BJP|SHH|BJP|21:18|09:24|12:06|Y|O3UuolZoM0VVKhi%2FsTBmHHJ2J7g9sWHQqUjUpqLijyvSO4lo|20190415|3|H6|01|05|0|0||||1|||||无||无||||O0I0J0|OIJ|1|0|null","HTuDCpXyllRUigLOrcyIG2mSFjI5lvN6sRCZrDm5FEXFO75MR4RghD8ytY5m1JbuPhGFawgi58xv%0AvKyoLRGqlKSL5WuLn%2BCqCb2EArhsW%2FdWpCcMRevIk1LXVChmfgUlCMG6otP%2BpHnr%2BeIirRQz2few%0AaARvKzPbdcIBTeL2ZS33wxFYgFBTRLgJmsXJPebK3Gkww%2FzIs63pKPdzIiqpY31YoahXc5V1jAbf%0Akv6IFoar%2BNqPQgZiqufol6CHji%2BMSAi%2FwBc8gsdZK2E5jKJ4hdCn2%2FNfB%2BhchCt6BFCFOvw%3D|预订|550000D71000|D710|SHH|VNP|SHH|VNP|21:24|09:22|11:58|Y|bE6T6Vz57B64uj%2F6waOgksuwVoVWE3c5zK0at8%2FT79ouH9uj|20190415|3|H6|01|05|1|0||||无|||||2||无||||O0I0J0|OIJ|0|0|null"]},"httpstatus": 200,"messages": "","status": true}
hjson = json.loads(info)["data"]["result"]
# raw_train:
# "|预订|550000Z16420|Z164|SHH|LSO|SHH|LSO|20:08|19:30|47:22|N|SIygZtG7LXRkGXyeINjk7T5kNo40ywRzkHSmSiTp6MyIlkHIGPlTcQfsc9U%3D|20190303|3|H2|01|14|0|0||||无|||无||无|无|||||10401030|1413|0|0|null"
for raw_train in hjson:
# 循环遍历每辆列车的信息
data_list = raw_train.split('|')
# 车次号码
train_no = data_list[3]
# 出发站
from_station_name = startcity
# 终点站
to_station_name = endcity
# 出发时间
start_time = data_list[8]
# 到达时间
arrive_time = data_list[9]
# 总耗时
time_used_up = data_list[10]
# 一等座
first_class_seat = data_list[31] or '--'
# 二等座
second_class_seat = data_list[30] or '--'
# 软卧
soft_sleep = data_list[23] or '--'
# 硬卧
hard_sleep = data_list[28] or '--'
# 硬座
hard_seat = data_list[29] or '--'
# 无座
no_seat = data_list[26] or '--'
list = ('车次:{} 出发站:{} 目的地:{} 出发时间:{} 到达时间:{} 火车运行时间:{} 座位情况:\n 一等座:「{}」 二等座:「{}」 软卧:「{}」 硬卧:「{}」 硬座:「{}」 无座:「{}」'.format(train_no, from_station_name, to_station_name, start_time, arrive_time, time_used_up, first_class_seat,second_class_seat, soft_sleep, hard_sleep, hard_seat, no_seat))
print('*'*100)
print(list)
print('*'*100)
print('经停站信息:')
stop = get_stopover_station(data_list[2],data_list[6],data_list[7],date)
stop_json = json.loads(stop)["data"]["data"]
for i in range(len(stop_json)):
if i != len(stop_json)-1:
print('车次:{} 出发站:{} 到达站:{} 出发时间:{} 到达时间:{} '.format(train_no,stop_json[i]['station_name'],stop_json[i+1]['station_name'],stop_json[i]['start_time'],stop_json[i+1]['arrive_time']))
if __name__ == '__main__':
try:
date = str(input('请输入日期(格式“2019-01-01”): '))
startcity = str(input('出发站: '))
endcity = str(input('到达站:'))
main(date,startcity,endcity)
except KeyError:
print('您输入的数据有问题,请重新输入')
效果图如下: