提示:以下是本篇文章正文内容,下面案例可供参考
一、cookie–反反爬案例
发现问题
一旦发现数据乱码,就是被反爬了
import requests
# 目标url
url = "https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2022-05-08&leftTicketDTO.from_station=SSH&leftTicketDTO.to_station=NVH&purpose_codes=ADULT "
# 请求头 伪装
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36 ",
}
res = requests.get(url, headers=header)
print(res.text)
部分结果:
var isnMonths = new initArray(“1月”, “2月”, “3月”, “4月”, “5月”, “6月”, “7月”, “8月”, “9月”, “10月”, “11月”, “12月”);
var isnDays = new initArray(“星期日”, “星期一”, “星期二”, “星期三”, “星期四”, “星期五”, “星期å
”, “星期日”);
完整案例演示
import requests
import json
# 目标url
url = "https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2022-05-08&leftTicketDTO.from_station=SSH&leftTicketDTO.to_station=NVH&purpose_codes=ADULT"
# 请求头 伪装
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36 ",
"Cookie": "_uab_collina=165115681613984897106036; JSESSIONID=2DC8E498A7C43F899A46596AF5A25724; highContrastMode=defaltMode; guidesStatus=off; cursorStatus=off; _jc_save_wfdc_flag=dc; _jc_save_fromStation=%u7ECD%u5174%u4E1C%2CSSH; _jc_save_toStation=%u5B81%u6CE2%u4E1C%2CNVH; route=9036359bb8a8a461c164a04f8f50b252; RAIL_EXPIRATION=1652220388497; RAIL_DEVICEID=JhjchnUmC1JrycFCWU6gVtuXbgBaAW_AwOMp9dKuHbfUG3XEWD2S9pbLlZdPA4EiDZWz03kX0fItt-c4atCddzmvSrgrPYf-uHqGqFwgFIjzrYAdSKwYgGyvvrz2gEKIwAZCz_atJWRHWKU8Ql9gPzqQf37CwWz4; _jc_save_fromDate=2022-05-08; _jc_save_toDate=2022-05-07; BIGipServerotn=501743882.50210.0000",
# "Referer": "https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc",
# "Host": "kyfw.12306.cn"
}
res = requests.get(url, headers=header)
# print(type(res.text),res.text)
# 第一种通过json模块将字符串转化为字典,在一步一步提取
# result_dict = json.loads(res.text)
# print(type(result_dict), result_dict)
# 第二种,requests自带的得到json格式数据
'''
仅限于动态加载出来的数据
'''
# print(res.json()['data']['result'])
result_list = res.json()['data']['result']
for result in result_list:
# 列表的下标索引是从0开始的
# 在车次列表数据当中 车次是下标索引为3的数据, 二等座的余票信息是下标索引为30的数据
tiket_num = result.split("|")[3]
# print(tiket_num,result.split("|")[30])
# one_site 是二等座的余票信息
one_site = result.split("|")[30]
lis = result.split("|")
# 根据余票情况做判断
if one_site != '无' and one_site != '':
print(tiket_num,'有票',one_site)
else:
print(tiket_num,'无票')
二、cookie–模拟登陆案例(QQ空间)
案例代码
import requests
import json
# 目标url
url = "https://user.qzone.qq.com/1411901876?ADUIN=1411901876&ADSESSION=1651896468&ADTAG=CLIENT.QQ.5887_MyTip.0&ADPUBNO=27211&source=namecardhoverstar"
# 请求头 伪装
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36 ",
"cookie": "RK=c1Jl2wzDNO; ptcz=cb0a1bdbf9832b42bea33ebbdd145842a9ff547bb313957db49ff76d7aed9ccf; pgv_pvi=7368728576; pac_uid=0_f7a942507b0af; iip=0; tvfe_boss_uuid=d5355185a42bcfc8; pgv_pvid=1077870710; LW_uid=u1p613z1m775T7S8Y9k0K6S6I3; eas_sid=41q6f3U1t7z5D7I8g9I0j6O7x0; sd_userid=4531632236022001; sd_cookie_crttime=1632236022001; o_cookie=1411901876; Qs_lvt_323937=1631757887%2C1631758583%2C1645502055; Qs_pv_323937=3766673236515106300%2C3658028716656174000%2C2560220660252052000; ptui_loginuin=1411901876; __Q_w_s_hat_seed=1; LW_sid=01i6k4f8o8w090f5J7o665e7n3; uin=o1411901876; skey=@92Xby2oqr; p_uin=o1411901876; Loading=Yes; qz_screen=1536x864; 1411901876_todaycount=0; 1411901876_totalcount=5056; pgv_info=ssid=s2255134658; QZ_FE_WEBP_SUPPORT=1; cpu_performance_v8=0; _qpsvr_localtk=0.4724953313716327; qzone_check=1411901876_1651904513; pt4_token=MnwAod0V7Vgkaa*fyYW0rsKiop1NSFHSlqSVy3pG4jA_; p_skey=TBrKvDMNFKbJsZs866zLm92oARoBF7o4aFvZJuenseQ_"
}
res = requests.get(url, headers=header)
print(res.text)