import execjs
import re
import requests
import time
#测试url
url = 'http://gaj.chifeng.gov.cn/default.php?mod=article&fid=230&s63642044_start=0'
session = requests.session() #存储cookie
response = session.get(url) #第一次发起请求
print(response.text) #得到的html文本及js加密代码
text = response.text
f_js = re.findall("javascript\">(.*?)</script>", text)[0]
f_js = f_js.replace('screen.width','500').replace('screen.height','500').replace('window.location.href','"'+url+'"').replace('document.','').replace('self.location =',' return ') #字符串替换
ctx = execjs.compile(f_js) #编译
location = ctx.call("YunSuoAutoJump") #YunSuoAutoJump 要执行的js函数
second_url = "http://gaj.chifeng.gov.cn/" + location #验证后的url
time.sleep(0.2)
_ = session.get(second_url) #第二次发起请求,携带cookie信息
response = session.get(url) #第二次发起请求,得到你想要的结果
print(response.text)
第一次请求后的结果
<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/><meta http-equiv="Cache-Control" content="no-store, no-cache, must-revalidate, post-check=0, pre-check=0"/><meta http-equiv="Connection" content="Close"/><script type="text/javascript">function stringToHex(str){var val="";for(var i = 0; i < str.length; i++){if(val == "")val = str.charCodeAt(i).toString(16);else val += str.charCodeAt(i).toString(16);}return val;}function YunSuoAutoJump(){ var width =screen.width; var height=screen.height; var screendate = width + "," + height;var curlocation = window.location.href;if(-1 == curlocation.indexOf("security_verify_")){ document.cookie="srcurl=" + stringToHex(window.location.href) + ";path=/;";}self.location = "/default.php?mod=article&security_verify_data=" + stringToHex(screendate);}</script><script>setTimeout("YunSuoAutoJump()", 50);</script></head><!--2019-08-21 18:55:50--></html>
验证后的URL [second_url]
http://gaj.chifeng.gov.cn//default.php?mod=article&fid=230&s63642044_start=0&security_verify_data=3530302c353030
第三次请求才是我们想要的结果了!