现在很多的抢票之类的软件大部分都是基于爬虫的技术而做成的,12306网站对于反爬虫也是很强的。
1、登陆12306网站
登陆12306网站,系统会检查登陆用户的Cookie信息,所以在我们登陆请求的时候要携带有效的Cookie.
12306的登陆的url一但提交后,就会刷新当前的页面,即network上原来的链接会被刷掉,所以我们可以通过输入错误的密码来阻止刷新,这样我们就可以获得登陆的URL。
https://kyfw.12306.cn/passport/web/login
2、验证码可以通过From Data的answer看出是通过图片的坐标来验证的。
https://kyfw.12306.cn/passport/captcha/captcha-image64?login_site=E&module=login&rand=sjrand&1572354630430&callback=jQuery1910009093299883589978_1572354581015&_=1572354581018
通过观察可以得到上面的URL是可以得到图片的URL,但是图片的URL是经过base64进行编码。
import requests
import time
import re,ssl
import base64
import re,datetime
from PIL import Image
import urllib
import json,random
session=requests.session()
#登陆
def login(username,password):
ssl._create_default_https_context = ssl._create_unverified_context
code_list={'1':"39,39",
'2':"113,34",
'3':"192,39",
'4':"257,36",
'5':"42,115",
'6':"119,107",
'7':"185,124",
'8':"272,117"
}
headers={
'Referer': 'https://kyfw.12306.cn/otn/resources/login.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
url='https://kyfw.12306.cn/passport/captcha/captcha-image64?login_site=E&module=login&rand=sjrand&1547952303645&ca\
llback=jQuery19101759934201076836_1547951452422&_=1547951452436'
r=session.get(url,headers=headers,verify=True)
img_url=r'"image":"(.*?)"'
img_url=re.findall(img_url,r.text,re.S)[0]
img_url=base64.b64decode(img_url)
with open('a.png','wb')as f:
f.write(img_url)
f.close()
im=Image.open('a.png')
im.show()
im.close()
data_list=input("请输入验证码:")
data_list=data_list.split(',')
code=''
#print(session.cookies)
#BIGipServerpassport=session.cookies['BIGipServerpassport']
_passport_ct=session.cookies['_passport_ct']
_passport_session=session.cookies['_passport_session']
for i in data_list:
if len(data_list)==1:
code+=code_list[i]
else:
if len(code)==0:
code+=code_list[i]
else:
code+=','+code_list[i]
#print(code)
url='https://kyfw.12306.cn/passport/captcha/captcha-check?callback=jQuery191013743861189559858_'+str(int(time.time()*1000))+'&answer='+urllib.parse.quote(code)+'&rand=sjra\
nd&login_site=E&_='+str(int(time.time()*1000))
r=session.get(url)
print(r.text)
url_login='https://kyfw.12306.cn/passport/web/login'
headers={
'Cookie': '_passport_ct='+_passport_ct+'; _passport_session='+_passport_session+'; __guid=14023341.2869285705716891600.1569225014329.1348;\
_jc_save_fromStation=%u5E7F%u5DDE%2CGZQ; _jc_save_wfdc_flag=dc; _jc_save_toStation=%u8302%u540D%u897F%2CMMZ; _jc_save_fromDate=2019-06-07; _jc_save_toDate=2019-05-31; RAIL_EXPIRATION=\
{}; RAIL_DEVICEID=gq9n6rQjncP1U1_xm6IS-Qmcgx2jIxMvto4kJER5olMpnr4BM-qJaBZSbP7rFEfEv799ixJwPJfZ3Mr_LZ3ImK2jQca9VX-OKwT6IZKEfpabmiKGJGd2_EroOrUiyAesTkQaFGUuFTzep1u-GXobHn9jtnK_Twej; \
BIGipServerpool_passport='+'283968010.50215.0000'+'; route=9036359bb8a8a461c164a04f8f50b252; BIGipServerotn=1206911498.24610.0000; \
monitor_count=6'.format(str(int(time.time()*1000))),
'Referer': 'https://kyfw.12306.cn/otn/resources/login.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
data={'username': username,
'password': password,
'appid': 'otn',
'answer':code
}
r=session.post(url_login,headers=headers,data=data)
print(r.text)
url='https://kyfw.12306.cn/passport/web/auth/uamtk-static'
data={'appid': 'otn'}
headers={
'Referer': 'https://kyfw.12306.cn/otn/resources/login.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
r=session.post(url,headers=headers,data=data)
print(r.text)
newapptk = r.json()['newapptk']
url = 'https://kyfw.12306.cn/otn/uamauthclient'
data ={
'tk': newapptk
}
r=session.post(url, data=data)
print(r.text)
2、获得城市编号
#获得城市的编号
def city_name():
url="https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9090"
city_code=session.get(url)
city_code_list = city_code.text.split('|')
city_dict={}
for i,j in enumerate(city_code_list):
if '@' in j:
city_dict[city_code_list[i+1]]=city_code_list[i+2]
#print(city_dict)
return city_dict
3、获得车次信息
https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2019-10-29&leftTicketDTO.from_station=GZQ&leftTicketDTO.to_station=MMZ&purpose_codes=ADULT
def train_info(date,start_station,end_station):
start_station=city_name()[start_station]
end_station=city_name()[end_station]
headers={
#datetime.datetime.now().strftime('%Y-%m-%d')
'Referer': 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc&fs=%E5%B9%BF%E5%B7%9E%E5%8D%97,{}&ts=%E8%\
8C%82%E5%90%8D,{}&date={}&flag=N,N,Y'.format(start_station,end_station,date),
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
while True:
url='https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_sta\
tion={}&purpose_codes=ADULT'.format(str(date),str(start_station),str(end_station))
r=session.get(url)
#print('-----------------')
#print(url)
time.sleep(2)
#print(r.text)
if '"result":[]' not in str(r.text):
train_info_dict={'start_station':start_station,'end_station':end_station}
#print(r.text)
train_info=r.json()
try:
for i in train_info['data']['result']:
train_info_status = i.split('|')
#print(train_info_status)
if(train_info_status[0]!='') and train_info_status[29]=='有':
train_info_dict['secretStr'] = train_info_status[0]
train_info_dict['train_no'] = train_info_status[2]
train_info_dict['stationTrainCode'] = train_info_status[3]
train_info_dict['fromStationTelecode'] = train_info_status[4]
train_info_dict['toStationTelecode'] = train_info_status[7]
train_info_dict['leftTicket'] = train_info_status[12]
train_info_dict['train_location'] = train_info_status[15]
#print('===============')
return train_info_dict
except:
pass
4、检验登陆
在我们进行预订车票操作时,系统会先检查用户是否处于登陆状态。
def train_order(secretStr,train_date,quety_from_station_name,query_to_station_name):
#获取当前时间
back_train_date=datetime.datetime.now().strftime('%Y-%m-%d')
print(back_train_date)
#用户登录检查
url='https://kyfw.12306.cn/otn/login/checkUser'
headers={
'Referer': 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
data={'_json_att':''}
r=session.post(url,headers=headers,data=data)
#print(r.text)
url='https://kyfw.12306.cn/otn/leftTicket/submitOrderRequest'
headers={
'Referer': 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
data={'secretStr': urllib.parse.unquote(secretStr),
'train_date': train_date,
'back_train_date':back_train_date,
'tour_flag': 'dc',
'purpose_codes': 'ADULT',
'query_from_station_name': quety_from_station_name,
'query_to_station_name': query_to_station_name,
'undefined':''
}
#print(data)
r=session.post(url,headers=headers,data=data)
5、提交订单
1、该URL请求可以根据response内容获得key_check_isChange,globalRepeatSubmitToken,seat_type参数下面URL提交需要携带。因为下面的URL在系统内容请求是随机的,我们不知道是那个链接,所以可以两个URL请求,看那个有返回需要获得的内容。
https://kyfw.12306.cn/otn/confirmPassenger/initDc
https://kyfw.12306.cn/otn/confirmPassenger/initWc
2、获得allEncStr,passenger_id_no参数信息。
3、检查用户
返回成功结果:
4、检查是否还有车票
其中leftTicket参数是进行加密编码的
5、订单提交
其中leftTicket参数是进行加密编码的
#提交订单
def train_order(secretStr,train_date,quety_from_station_name,query_to_station_name):
#获取当前时间
back_train_date=datetime.datetime.now().strftime('%Y-%m-%d')
print(back_train_date)
#用户登录检查
url='https://kyfw.12306.cn/otn/login/checkUser'
headers={
'Referer': 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
data={'_json_att':''}
r=session.post(url,headers=headers,data=data)
url='https://kyfw.12306.cn/otn/leftTicket/submitOrderRequest'
headers={
'Referer': 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
data={'secretStr': urllib.parse.unquote(secretStr),
'train_date': train_date,
'back_train_date':back_train_date,
'tour_flag': 'dc',
'purpose_codes': 'ADULT',
'query_from_station_name': quety_from_station_name,
'query_to_station_name': query_to_station_name,
'undefined':''
}
r=session.post(url,headers=headers,data=data)
print(r.text)
url='https://kyfw.12306.cn/otn/passport?redirect=/otn/leftTicket/submitOrderRequest'
#提交订单
def create_order(name,phone_number,train_date,train_info_dict):
# 检查订单信息
url=['https://kyfw.12306.cn/otn/confirmPassenger/initWc','https://kyfw.12306.cn/otn/confirmPassenger/initDc']
for i in range(2):
data={'_json_att':''
}
headers={
'Referer':'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=wc',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
r=session.post(url[i],headers=headers,data=data)
r.encoding=r.apparent_encoding
#print(r.text)
globalRepeatSubmitToken =re.findall(r"globalRepeatSubmitToken = '(.*?)'",r.text)
key_check_isChange =re.findall(r"'key_check_isChange':'(.+?)'",r.text)
seat_type =re.findall(r"init_seatTypes=.+?'id':'(.+?)'",r.text)
a=url[i]
num=i
if(len(seat_type)>0):
break
print(key_check_isChange,globalRepeatSubmitToken,seat_type)
url="https://kyfw.12306.cn/otn/confirmPassenger/getPassengerDTOs"
data={'_json_att':'',
'REPEAT_SUBMIT_TOKEN':globalRepeatSubmitToken[0]
}
headers={
'Referer':a,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
print(data)
r=session.post(url,headers=headers,data=data)
allEncStr=re.findall(r'"allEncStr":"(.*?)"',r.text)
identity_card=re.findall(r'"passenger_id_no":"(.*?)"',r.text)
print(allEncStr,identity_card)
headers={
'Referer':a,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
url='https://kyfw.12306.cn/otn/confirmPassenger/checkOrderInfo'
passengerTicketStr='1,0,1,'+name+',1,'+identity_card[0]+','+phone_number+',N,'+allEncStr[0]
oldPassengerStr =name+',1,'+identity_card[0]+',1_'
tour_flag=["wc","dc"]
data={'cancel_flag': '2',
'bed_level_order_num': '000000000000000000000000000000',
'passengerTicketStr': passengerTicketStr,
'oldPassengerStr': oldPassengerStr,
'tour_flag': tour_flag[num],
'randCode': '',
'whatsSelect': '1',
'_json_att':'',
'REPEAT_SUBMIT_TOKEN': globalRepeatSubmitToken[0]
}
r = session.post(url,headers=headers,data=data)
print(r.text)
# 提交订单信息
while True:
url='https://kyfw.12306.cn/otn/confirmPassenger/getQueueCount'
# 日期格式化处理
check_ticket_date = train_date+' 00:00:00'
timeArray = time.strptime(check_ticket_date,"%Y-%m-%d %H:%M:%S")
date = time.strftime("%a %b %d %Y",timeArray)
data={'train_date': date+' 00:00:00 GMT+0800 (中国标准时间)',
'train_no': train_info_dict['train_no'],
'stationTrainCode': train_info_dict['stationTrainCode'],
'seatType': seat_type[0],
'fromStationTelecode': train_info_dict['fromStationTelecode'],
'toStationTelecode': train_info_dict['toStationTelecode'],
#leftTicket进行数据格式化处理
'leftTicket':urllib.parse.unquote(train_info_dict['leftTicket']),
'purpose_codes': '00',
'train_location': train_info_dict['train_location'],
'_json_att':'',
'REPEAT_SUBMIT_TOKEN': globalRepeatSubmitToken[0]
}
iplist=['http://114.239.254.5:9999','http://115.238.59.86:33744','http://183.11.235.48:9292','http://175.42.123.177:9999','http://163.204.242.69:9999']
proxies={'http':"http://115.238.59.86:33744"}
headers={
'Referer':a,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
r = session.post(url,data=data)
print(r.text)
# 判断请求是否成功
if '系统繁忙,请稍后重试' not in str(r.text):
print(r.text)
break
time.sleep(5)
# 生成订单
if 'n'==input('是否生成订单:'):
return
url='https://kyfw.12306.cn/otn/confirmPassenger/confirmSingleForQueue'
data={'passengerTicketStr': passengerTicketStr,
'oldPassengerStr': oldPassengerStr,
'randCode': '',
'purpose_codes': '00',
'key_check_isChange': key_check_isChange[0],
'leftTicketStr': urllib.parse.unquote(train_info_dict['leftTicket']),
'train_location': train_info_dict['train_location'],
'choose_seats': '',
'seatDetailType': '000',
'roomType': '00',
'dwAll': 'N',
'_json_att': '',
'REPEAT_SUBMIT_TOKEN': globalRepeatSubmitToken[0]
}
r = session.post(url,data=data)
print(r.text)
if('errMsg' in r.text):
print(r.json()['errMsg'])
elif(r.json()['data'] is not None):
print(r.json()['data'])
else:
print("订单生成成功,请及时去付款!")
程序入口:
if __name__=="__main__":
login('登陆账号','登陆密码')
train_info_dict=train_info('出发时间','出发点','抵达点')
#print(train_info_dict)
train_order(train_info_dict['secretStr'],'出发时间','出发点','抵达点')
name='乘客名'
phone_number='电话号码'
create_order(name,phone_number,'出发时间',train_info_dict)
代码实现效果:
最后,12306的网站的URL请求是会经常改变的,该代码仅供参考,12306网站为了防止抢票软件,会不定期在一些URL请求的细节方面做些改变,这些需要我们去发现,但也要不要过于频繁,因为12306如果发现过于频繁访问是禁止该用户登陆的。