目标网址
‘aHR0cHM6Ly93d3cueW91emhpY2FpLmNvbS9udjEvMDEwMTAxMDAwMTAxMDEwMS5odG1s’
分析
打开控制台抓包,第一次状态码203,会生成cookie,第二次访问会带上该cookie
知道了是cookie加密,使用hook定位到加密位置
Object.defineProperty(document, "cookie", {
get: function(val){
return val;
},
set: function(val){
debugger;
return val;
}
})
定位到位置,点击上层堆栈
参数b就是我们想要的值
黑盒调用
把整个js代码复制出来,在代理器下运行,看它需要哪些环境
不了解代理器的可以看这几篇文章
https://zhuanlan.zhihu.com/p/30299114
https://www.cnblogs.com/tugenhua0707/p/10291909.html#_labe0
https://zhuanlan.zhihu.com/p/60791215
proxy代理器我放到最后面
然后加句打印b的值
因为js代码是动态加载的,需要把每一次返回的js代码进行替换执行拿到b的值
把补的环境单独拿出来
直接上代码
import requests
import execjs
class spider:
def __init__(self):
self.session = requests.Session()
self.headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-User': '?1',
'Sec-Fetch-Dest': 'document',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
# js环境
self.js_env = """
var CryptoJS = require("crypto-js");
let navigator = {
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
platform: "Win32",
appCodeName: "Mozilla",
language: "zh-CN",
webdriver: false,
cookieEnabled: true,
appVersion: "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
};
Object.defineProperties(navigator,{
[Symbol.toStringTag]: {
value:"Navigator"
}
})
location = {
pathname: 'www.youzhicai.com',
href: 'https://www.youzhicai.com/nv1/0101010001010101.html',
host: 'www.youzhicai.com',
reload: function (){},
}
document = {}
window = {
navigator: navigator,
location: location,
document: document,
};
"""
# 调用cookie
self.get_cookie = """
function getcookie(){
return b
}
"""
def run(self):
response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers)
js_code = response.text.replace("<script>", "").replace("</script>", "")
# 拼接js代码执行
all_code = self.js_env + js_code + self.get_cookie
context = execjs.compile(all_code)
# 执行函数
cookie = context.call("getcookie")
print("第一次的cookie:", cookie)
# 添加cookie
requests.utils.add_dict_to_cookiejar(self.session.cookies, {"spvrscode": cookie})
response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers)
print(response.text)
if __name__ == '__main__':
spider = spider()
spider.run()
成功拿到页面
proxy代理器
let rawindexof = String.prototype.indexOf
String.prototype.indexOf = function (str) {
var res = rawindexof.call(this, str)
console.log(`[String] "${this}" is indexof "${str}", res is ${res}`)
return res
}
let mynavigator = {
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
platform: "Win32",
appCodeName: "Mozilla",
language: "zh-CN",
webdriver: false,
cookieEnabled: true,
appVersion: "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
};
Object.defineProperties(mynavigator, {
[Symbol.toStringTag]: {
value: "Navigator"
}
})
let myhistory = {
};
let myscreen = {
height: 768,
width: 1366,
colorDepth: 24,
};
let mylocation = {
pathname: 'www.youzhicai.com',
href: 'https://www.youzhicai.com/nv1/0101010001010101.html',
host: 'www.youzhicai.com',
};
let Document = function Document(){}
let HTMLDocument = function HTMLDocument(){}
Object.setPrototypeOf(HTMLDocument, Document.prototype)
Object.defineProperties(HTMLDocument.prototype, {
[Symbol.toStringTag]: {
value: "HTMLDocument"
}
})
let mydocument = {
createElement: function (){
return {};
},
getElementsByTagName: function (str){
console.log(str)
if(str == "meta"){
let metaRes = []
metaRes["meta-pro"] = {
"content": {
"length": 6
}
}
return metaRes
}
},
};
// 为document指向原型
Object.setPrototypeOf(mydocument, HTMLDocument.prototype)
let Image = function (){}
let mywindow = {
XMLHttpRequest: function () {},
sessionStorage: {},
localStorage: {},
navigator: mynavigator,
scrollTo: function (){},
addEventListener: function () {},
attachEvent: function () {},
screen: myscreen,
location: mylocation,
chrome: {},
document: mydocument,
history: myhistory
};
Object.defineProperties(global, {
[Symbol.toStringTag]: {
value: "Window"
}
})
let rawstringify = JSON.stringify;
JSON.stringify = function (Object) {
if ((Object?.value ?? Object) === global) {
return "global"
} else {
return rawstringify(Object)
}
}
function getMethodHandler(WatchName) {
let methodhandler = {
apply(target, thisArg, argArray) {
let result = Reflect.apply(target, thisArg, argArray)
console.log(`[${WatchName}] apply function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
return result
},
construct(target, argArray, newTarget) {
var result = Reflect.construct(target, argArray, newTarget)
console.log(`[${WatchName}] construct function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
return result;
}
}
return methodhandler
}
function getObjhandler(WatchName) {
let handler = {
get(target, propKey, receiver) {
let result = Reflect.get(target, propKey, receiver)
if (result instanceof Object) {
if (typeof result === "function") {
console.log(`[${WatchName}] getting propKey is [${propKey}] , it is function`)
//return new Proxy(result,getMethodHandler(WatchName))
} else {
console.log(`[${WatchName}] getting propKey is [${propKey}], result is [${result}]`);
}
return new Proxy(result, getObjhandler(`${WatchName}.${propKey}`))
}
console.log(`[${WatchName}] getting propKey is [${propKey?.description ?? propKey}], result is [${result}]`);
return result;
},
set(target, propKey, value, receiver) {
if (value instanceof Object) {
console.log(`[${WatchName}] setting propKey is [${propKey}], value is [${value}]`);
} else {
console.log(`[${WatchName}] setting propKey is [${propKey}], value is [${value}]`);
}
return Reflect.set(target, propKey, value, receiver);
},
has(target, propKey) {
var result = Reflect.has(target, propKey);
console.log(`[${WatchName}] has propKey [${propKey}], result is [${result}]`)
return result;
},
deleteProperty(target, propKey) {
var result = Reflect.deleteProperty(target, propKey);
console.log(`[${WatchName}] delete propKey [${propKey}], result is [${result}]`)
return result;
},
getOwnPropertyDescriptor(target, propKey) {
var result = Reflect.getOwnPropertyDescriptor(target, propKey);
console.log(`[${WatchName}] getOwnPropertyDescriptor propKey [${propKey}] result is [${result}]`)
return result;
},
defineProperty(target, propKey, attributes) {
var result = Reflect.defineProperty(target, propKey, attributes);
console.log(`[${WatchName}] defineProperty propKey [${propKey}] attributes is [${attributes}], result is [${result}]`)
return result
},
getPrototypeOf(target) {
var result = Reflect.getPrototypeOf(target)
console.log(`[${WatchName}] getPrototypeOf result is [${result}]`)
return result;
},
setPrototypeOf(target, proto) {
console.log(`[${WatchName}] setPrototypeOf proto is [${proto}]`)
return Reflect.setPrototypeOf(target, proto);
},
preventExtensions(target) {
console.log(`[${WatchName}] preventExtensions`)
return Reflect.preventExtensions(target);
},
isExtensible(target) {
var result = Reflect.isExtensible(target)
console.log(`[${WatchName}] isExtensible, result is [${result}]`)
return result;
},
ownKeys(target) {
var result = Reflect.ownKeys(target)
console.log(`[${WatchName}] invoke ownkeys, result is [${result}]`)
return result
},
apply(target, thisArg, argArray) {
let result = Reflect.apply(target, thisArg, argArray)
console.log(`[${WatchName}] apply function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
return result
},
construct(target, argArray, newTarget) {
var result = Reflect.construct(target, argArray, newTarget)
console.log(`[${WatchName}] construct function name is [${target.name}], argArray is [${argArray}], result is [${result}].`)
return result;
}
}
return handler;
}
const navigator = new Proxy(Object.create(mynavigator), getObjhandler("navigator"));
const history = new Proxy(Object.create(myhistory), getObjhandler("history"))
const screen = new Proxy(Object.create(myscreen), getObjhandler("screen"));
const location = new Proxy(mylocation, getObjhandler("location"));
const document = new Proxy(mydocument, getObjhandler("document"));
const window = new Proxy(Object.assign(global, mywindow), getObjhandler("window"));
//checkproxy()
module.exports = {
window,
navigator,
screen,
location,
Image,
document,
history,
Document
}
在运行的ja文件前面添加导入
let {
window,
navigator,
location,
screen,
Image,
document,
history,
Document
} = require('Proxy.js');
算法还原
分析把这段代码拿出来,一点点还原
可以在上面找到它的密钥和需要加密的字符串
结果是一样的
可以看到这是一个DES加密算法
流程是第一次请求拿到js代码,从中拿到密钥和需要加密的字符串,再有python加密进行请求
代码
from pyDes import des, ECB, PAD_PKCS5
import binascii
import requests
import re
class spider:
def __init__(self):
self.session = requests.Session()
self.headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '"Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-User': '?1',
'Sec-Fetch-Dest': 'document',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
def des_encrypt(self, KEY, s):
"""
DES 加密
:param s: 原始字符串
:return: 加密后字符串,16进制
"""
secret_key = KEY # 密码
iv = secret_key # 偏移
# secret_key:加密密钥,CBC:加密模式,iv:偏移, padmode:填充
des_obj = des(secret_key, ECB, iv, pad=None, padmode=PAD_PKCS5)
# 返回为字节
secret_bytes = des_obj.encrypt(s, padmode=PAD_PKCS5)
# 返回为16进制
return binascii.b2a_hex(secret_bytes)
def run(self):
response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers)
a = re.findall(";var a= '(.*?)';", response.text)[0]
b = re.findall(";var b = '(.*?)';", response.text)[0]
print(a)
print(b)
cookie = self.des_encrypt(a, b).decode()
print("第一次的cookie:", cookie)
# 添加cookie
requests.utils.add_dict_to_cookiejar(self.session.cookies, {"spvrscode": cookie})
response = self.session.get('https://www.youzhicai.com/nv1/0101010001010101.html', headers=self.headers)
print(response.text)
if __name__ == '__main__':
spider = spider()
spider.run()
请求成功