套路:
加速乐产品分析:会发起3次请求
第一次请求:设置cookie : __jsluid_h
返回一段js,执行js可以获取到第一个__jsl_clearance
状态:400
第二次请求:设置cookie : __jsluid_h 和 第一个__jsl_clearance
返回一段ob混淆过的js代码,代码中go函数入参包含一种随机生成的算法类型(md5/sha1/sha256),执行js中对应的加密方法,可以获取到第二个__jsl_clearance
状态:521
第三次请求:设置cookie : __jsluid_h 和 第二个__jsl_clearance
返回正确的网页html文本数据
状态:200
下面以这个网址 http://www.scio.gov.cn/xwfb/gwyxwbgsxwfbh/fbh/ 为例
代码:
这里提供Java和python两种方案代码:
Java代码:
import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.util.Cookie;
import org.json.JSONObject;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class SetJslCookie {
private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36";
public static void main(String[] args) throws Exception {
String url = "http://www.scio.gov.cn/xwfb/gwyxwbgsxwfbh/fbh/";
WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setRedirectEnabled(true);
webClient.getOptions().setUseInsecureSSL(true);
webClient.addRequestHeader("User-Agent", USER_AGENT);
setFirstCookie(webClient, url);
setSecondCookie(webClient, url);
WebRequest request = new WebRequest(new java.net.URL(url), HttpMethod.GET);
WebResponse response = webClient.loadWebResponse(request);
String content = response.getContentAsString();
System.out.println(content);
webClient.close();
}
private static void setFirstCookie(WebClient webClient, String url) throws IOException {
WebRequest request = new WebRequest(new java.net.URL(url), HttpMethod.GET);
WebResponse response = webClient.loadWebResponse(request);
String content = response.getContentAsString();
Pattern pattern = Pattern.compile("cookie=(.*?);location");
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
String cookieValue = matcher.group(1);
String[] parts = executeJavaScript(cookieValue).split(";")[0].split("=");
Cookie cookie = new Cookie(getDomain(url), parts[0], parts[1]);
webClient.getCookieManager().addCookie(cookie);
}
}
private static void setSecondCookie(WebClient webClient, String url) throws IOException {
WebRequest request = new WebRequest(new java.net.URL(url), HttpMethod.GET);
request.setAdditionalHeader("Referer", url);
WebResponse response = webClient.loadWebResponse(request);
String content = response.getContentAsString();
Pattern pattern = Pattern.compile("go\\(\\{(.*?)\\}\\)");
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
String jsonString = "{" + matcher.group(1) + "}";
JSONObject jsonObject = new JSONObject(jsonString);
String cookieName = jsonObject.getString("tn");
String cookieValue = setJslClearanceS(jsonObject);
Cookie cookie = new Cookie(getDomain(url), cookieName, cookieValue);
webClient.getCookieManager().addCookie(cookie);
}
}
private static String setJslClearanceS(JSONObject jsonObject) {
String bts0 = jsonObject.getJSONArray("bts").getString(0);
String bts1 = jsonObject.getJSONArray("bts").getString(1);
String ha = jsonObject.getString("ha");
String ct = jsonObject.getString("ct");
String chars = jsonObject.getString("chars");
for (int e = 0; e < chars.length(); e++) {
for (int r = 0; r < chars.length(); r++) {
String text = bts0 + chars.charAt(e) + chars.charAt(r) + bts1;
if (getHash(text, ha).equals(ct)) {
return text;
}
}
}
return null;
}
private static String getHash(String string, String mode) {
try {
MessageDigest digest = MessageDigest.getInstance(mode.toUpperCase());
byte[] hashBytes = digest.digest(string.getBytes(StandardCharsets.UTF_8));
return toHexString(hashBytes);
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("Error getting hash: " + e.getMessage());
}
}
private static String toHexString(byte[] bytes) {
StringBuilder hexString = new StringBuilder();
for (byte b : bytes) {
String hex = Integer.toHexString(0xff & b);
if (hex.length() == 1) {
hexString.append('0');
}
hexString.append(hex);
}
return hexString.toString();
}
private static String executeJavaScript(String script) {
try {
ScriptEngineManager manager = new ScriptEngineManager();
ScriptEngine engine = manager.getEngineByName("JavaScript");
return engine.eval(script).toString();
} catch (ScriptException e) {
throw new RuntimeException("Error executing JavaScript: " + e.getMessage());
} catch (javax.script.ScriptException e) {
throw new RuntimeException(e);
}
}
private static String getDomain(String url) {
String domainRegex = "^(?:https?://)?([a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)+)(?::\\d+)?(?:/.*)?$";
Pattern pattern = Pattern.compile(domainRegex);
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
return matcher.group(1);
} else {
throw new RuntimeException("无效的url,解析域名失败");
}
}
}
python代码:
import re
import hashlib
import json
import execjs
import requests
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh,zh-CN;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
}
class Set_Jsl_Coolie(object):
def __init__(self, url):
self.url = url
self.headers = headers
self.session = requests.session()
self.set_first_cookie()
self.set_second_cookie()
def set_first_cookie(self):
first_response = self.session.get(self.url, headers=self.headers)
content_first = re.findall('cookie=(.*?);location', first_response.text)[0]
jsl_clearance_s = execjs.eval(content_first).split(';')[0].split('=')
self.session.cookies[jsl_clearance_s[0]] = jsl_clearance_s[1]
def set_second_cookie(self):
self.headers['Referer'] = self.url
def set_jsl_clearance_s(data):
def get_hash(string, mode):
mode = mode.lower()
if mode == 'sha1':
return hashlib.sha1(string.encode('utf-8')).hexdigest()
elif mode == 'sha256':
return hashlib.sha256(string.encode('utf-8')).hexdigest()
else:
return hashlib.md5(string.encode('utf-8')).hexdigest()
for e in range(len(data["chars"])):
for r in range(len(data["chars"])):
text = data["bts"][0] + data['chars'][e:e + 1] + data['chars'][r:r+1] + data["bts"][1]
if get_hash(text, data['ha']) == data['ct']:
return text
second_response = self.session.get(self.url, headers=self.headers)
content_second = json.loads("{" + re.findall("go\({(.*?)}\)", second_response.text) [0] + "}")
self.session.cookies[content_second["tn"]] = set_jsl_clearance_s(content_second)
def get_session(self):
return self.session
url = 'http://www.scio.gov.cn/xwfb/gwyxwbgsxwfbh/fbh/'
Set_Jsl_Coolie(url)
session = a.get_session()
response = session.get(url, headers=headers)
response.encoding = 'utf-8'print(response.text)