1.查询全国各车站信息并保存为json格式
mport time
import json
import requests
from requests.exceptions import RequestException
def getResponse(url):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response
return None
except RequestException:
return None
if __name__ == "__main__":
url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js"
data = getResponse(url)
if data is not None:
dict_data = {}
text = data.text
str_split = text.split('@')
for chars in str_split[1:]:
station = chars.split('|')
dict_data[station[1]] = station[2]
# print (dict_data['深圳北'])
with open("stations.json", 'w', encoding='utf-8') as fp:
json.dump(dict_data, fp, ensure_ascii=False)
2.爬取车次信息
import pandas as pd
import requests
import json
'''
f = open('city.json',mode='r',encoding='utf-8')
text = f.read()
city_json = json.loads(text)
'''
url = 'https://kyfw.12306.cn/otn/leftTicket/query?'
data = {
'leftTicketDTO.train_date': '2023-12-09',
'leftTicketDTO.from_station': 'BJP',
'leftTicketDTO.to_station': 'CDW',
'purpose_codes': 'ADULT'
}
headers = {'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
'Cookie':
'JSESSIONID=075AD1662DDC1E1B4BA1C32399779883; BIGipServerpassport=921174282.50215.0000; guidesStatus=off; highContrastMode=defaltMode; cursorStatus=off; route=9036359bb8a8a461c164a04f8f50b252; BIGipServerotn=2045247754.50210.0000; _jc_save_fromStation=%u5317%u4EAC%2CBJP; _jc_save_toStation=%u6210%u90FD%2CCDW; _jc_save_fromDate=2023-12-09; _jc_save_toDate=2023-12-09; _jc_save_wfdc_flag=dc',
'Referer':
'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc&fs=%E5%8C%97%E4%BA%AC,BJP&ts=%E6%88%90%E9%83%BD,CDW&date=2023-12-09&flag=N,N,Y'
}
response = requests.get(url=url,params=data,headers=headers)
response.encoding = response.apparent_encoding # 自动识别编码
result = response.json()['data']['result']
lis = []
for index in result:
index_list = index.replace('有','Yes').replace('无','No').split('|') # 字符串分割方法,分割返回的是列表数据
num = index_list[3] #车次
if 'G' in num:
start_time = index_list[8] # 出发时间
end_time = index_list[9] # 到达时间
use_time = index_list[10] # 耗时
Top = index_list[32] # 特等座
first = index_list[31] # 一等座
second = index_list[30] # 二等座
dit = {
'num':num,
'start':start_time,
'end':end_time,
'usetime':use_time,
'Top':Top,
'first':first,
'second':second,
'yz':'-',
'wz':'-',
'rw':'-',
'yw':'-',
}
else:
start_time = index_list[8] # 出发时间
end_time = index_list[9] # 到达时间
use_time = index_list[10] # 耗时
rw = index_list[23] # 软卧
yw = index_list[28] # 硬卧
yz = index_list[29] # 硬座
wz = index_list[26] # 无座
dit = {
'num': num,
'start': start_time,
'end': end_time,
'usetime': use_time,
'Top': '-',
'first': '-',
'second': '-',
'yz': yz,
'wz': wz,
'rw': rw,
'yw': yw,
}
lis.append(dit)
content = pd.DataFrame(lis)
print(content)