Python爬虫进阶必备，某著名人均百万问答社区header参数加密逻辑分析。(1)

最新推荐文章于 2024-06-14 09:32:55 发布

Ndk开发校长

最新推荐文章于 2024-06-14 09:32:55 发布

阅读量668

点赞数 19

分类专栏：程序员文章标签： python 爬虫 javascript

本文链接：https://blog.csdn.net/m0_61041374/article/details/138349102

版权

程序员专栏收录该内容

150 篇文章 0 订阅

订阅专栏

是可以正常得到结果的，那我们要把这个代码在 node 中运行看看

加密改写

在 node 里运行结果我改了改了，保证他可以运行不报错

首先直接将代码复制过来运行是会报错的

简单修改下，声明 window ，并把最后的 exports 修改为 window.exports

修改后调用发现报错 atob 未定义

这个应该大家都会吧，其实就是 base64，补的方法有很多种

方法 1 ：

_keyStr = “ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=”;

function _utf8_encode (string) {

var string = string.replace(/\r\n/g,“\n”);

var utftext = “”;

for (var n = 0; n < string.length; n++) {

var c = string.charCodeAt(n);

if (c < 128) {

utftext += String.fromCharCode©;

} else if((c > 127) && (c < 2048)) {

utftext += String.fromCharCode((c >> 6) | 192);

utftext += String.fromCharCode((c & 63) | 128);

} else {

utftext += String.fromCharCode((c >> 12) | 224);

utftext += String.fromCharCode(((c >> 6) & 63) | 128);

utftext += String.fromCharCode((c & 63) | 128);

}

return utftext;

}

function _utf8_decode (utftext) {

var string = “”;

var i = 0;

var c = 0;

var c1 = 0;

var c2 = 0;

var c3 = 0;

while ( i < utftext.length ) {

c = utftext.charCodeAt(i);

if (c < 128) {

string += String.fromCharCode©;

i++;

} else if((c > 191) && (c < 224)) {

c2 = utftext.charCodeAt(i+1);

string += String.fromCharCode(((c & 31) << 6) | (c2 & 63));

i += 2;

} else {

c2 = utftext.charCodeAt(i+1);

c3 = utftext.charCodeAt(i+2);

string += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));

i += 3;

}

return string;

}

var xazxBase64 = {

‘decode’: function (input){

output = “”;

var chr1, chr2, chr3;

var enc1, enc2, enc3, enc4;

i = 0;

input = input.replace(/[^A-Za-z0-9+/=]/g, “”);

while (i < input.length) {

enc1 = _keyStr.indexOf(input.charAt(i++));

enc2 = _keyStr.indexOf(input.charAt(i++));

enc3 = _keyStr.indexOf(input.charAt(i++));

enc4 = _keyStr.indexOf(input.charAt(i++));

chr1 = (enc1 << 2) | (enc2 >> 4);

chr2 = ((enc2 & 15) << 4) | (enc3 >> 2);

chr3 = ((enc3 & 3) << 6) | enc4;

output = output + String.fromCharCode(chr1);

if (enc3 !== 64) {

output = output + String.fromCharCode(chr2);

}

if (enc4 !== 64) {

output = output + String.fromCharCode(chr3);

}

output = _utf8_decode(output);

return output;

‘encode’: function (input){

output = “”;

var chr1, chr2, chr3, enc1, enc2, enc3, enc4;

i = 0;

input = _utf8_encode(input);

while (i < input.length) {

chr1 = input.charCodeAt(i++);

chr2 = input.charCodeAt(i++);

chr3 = input.charCodeAt(i++);

enc1 = chr1 >> 2;

enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);

enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);

enc4 = chr3 & 63;

if (isNaN(chr2)) {

enc3 = enc4 = 64;

} else if (isNaN(chr3)) {

enc4 = 64;

}

output = output +

_keyStr.charAt(enc1) + _keyStr.charAt(enc2) +

_keyStr.charAt(enc3) + _keyStr.charAt(enc4);

}

return output;

}

};

方法 2 ：

global.Buffer = global.Buffer || require(‘buffer’).Buffer;

if (typeof btoa === ‘undefined’) {

global.btoa = function (str) {

return new Buffer.from(str, ‘binary’).toString(‘base64’);

};

}

if (typeof atob === ‘undefined’) {

global.atob = function (b64Encoded) {

return new Buffer.from(b64Encoded, ‘base64’).toString(‘binary’);

};

}

方法 3 ：

var atob = function® {

e = “ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=”;

var o = String®.replace(/=+$/, “”);

if (o.length % 4 == 1)

throw new t(“‘atob’ failed: The string to be decoded is not correctly encoded.”);

for (var n, a, i = 0, c = 0, d = “”; a = o.charAt(c++); ~a && (n = i % 4 ? 64 * n + a : a,

i++ % 4) ? d += String.fromCharCode(255 & n >> (-2 * i & 6)) : 0)

a = e.indexOf(a);

return d

}

jsdom 版生成正确加密值

这个是网上流传最多的版本，其实也没有毛病，直接用 jsdom 套个环境就完事了

使用方法也非常简单

npm install jsdom

在代码开头加上下面的代码

const jsdom = require(“jsdom”);

const { JSDOM } = jsdom;

const dom = new JSDOM(<!DOCTYPE html><p>Hello world</p>);

window = dom.window;

document = window.document;

XMLHttpRequest = window.XMLHttpRequest;

直接运行可以得到下面的结果

输入值

127927b6d4c1814afa22cdea9c7d7be9

正确结果

aHt0c6Lyn9Ox28S8K0OqNJuqb0FYoXYBG8F0b7uySRYf

jsdom的结果

aHt0c6Lyn9Ox28S8K0OqNJuqb0FYoXYBG8F0b7uySRYf

node 版生成正确的加密值

如果要使用 node 生成正确的加密结果

推荐采用方法 2，可以直接得到近似结果，但是结果是多了最后的 4 位，偷懒一点直接截掉后四位就行了

输入值

c06829267e17d3941f5c4cf33db9d509

正确结果

aHt0c6Lyn9Ox28S8K0OqNJuqb0FYoXYBG8F0b7uySRYf

我们自己的结果

aHt0c6Lyn9Ox28S8K0OqNJuqb0FYoXYBG8F0b7uySRYf9Tuw

截掉后四位就完事了

想知道一步到位的方法就需要一点点分析分析他的加密了

如果不想分析的接下来的部分可以跳过

主要内容已经全部讲完了，剩下的内容没有代码

接下来主要会告诉你分析插桩的点在哪里

先看加密的入口

__g._encrypt(encodeURIComponent(e))

这里的 __g._encrypt 是 r()

r 是在下面这里调用的

这里用到了 o.v 这里的 o.v 是由 new G.v 生成的

就是代码里的一长串 base64 编码

传入这一串编码之后就在 G.prototype.D 和 G.prototype.v 来回跳转，并且在这两个方法做一些判断，移位的操作最后生成最后的结果

能插桩看到信息的点在哪里呢？

全局检索 var k

在这里把 charCodeAt 的结果打印出来，得到的结果如下

__g

_encrypt

window

undefined

window

navigator

Object

name

nodejs

userAgent

headless

userAgent

toLowerCase

indexOf

callPhantom

_phantom

__phantomas

buffer

Buffer

emit

spawn

webdriver

domAutomation

domAutomationController

getOwnPropertyDescriptor

userAgent

getOwnPropertyDescriptor

webdriver

getOwnPropertyDescriptor

[native code]

getOwnPropertyDescriptor

Function

prototype

toString

call

indexOf

length

RuPtXwxpThIZ0qyz_9fYLCOV8B1mMGKs7UnFHgN3iDaWAJE-Qrk2ecSo6bjd4vl5

length

charCodeAt

…

charAt

…

charCodeAt

…

charAt

…

charCodeAt

…

charAt

…

在这里插入图片描述

感谢每一个认真阅读我文章的人，看着粉丝一路的上涨和关注，礼尚往来总是要有的：

①　2000多本Python电子书（主流和经典的书籍应该都有了）

②　Python标准库资料（最全中文版）

③　项目源码（四五十个有趣且经典的练手项目及源码）

④　Python基础入门、爬虫、web开发、大数据分析方面的视频（适合小白学习）

⑤ Python学习路线图（告别不入流的学习）

网上学习资料一大堆，但如果学到的知识不成体系，遇到问题时只是浅尝辄止，不再深入研究，那么很难做到真正的技术提升。

需要这份系统化学习资料的朋友，可以戳这里无偿获取

一个人可以走的很快，但一群人才能走的更远！不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人，都欢迎加入我们的的圈子（技术交流、学习资源、职场吐槽、大厂内推、面试辅导），让我们一起学习成长！

Ndk开发校长

关注

19
点赞
踩
29

收藏

觉得还不错? 一键收藏
0
评论
Python爬虫进阶必备，某著名人均百万问答社区header参数加密逻辑分析。(1)

是可以正常得到结果的，那我们要把这个代码在 node 中运行看看。
复制链接

扫一扫

专栏目录

Python爬虫进阶必备，某著名人均百万问答社区header参数加密逻辑分析。(1)

加密改写

输入值

正确结果

jsdom的结果

node 版生成正确的加密值

输入值

正确结果

我们自己的结果

截掉后四位就完事了

“相关推荐”对你有帮助么？