处理爬虫是返回setCookie的一段js获取acw_sc__v2的方法

最新推荐文章于 2024-09-06 15:18:29 发布

lucas-lin

最新推荐文章于 2024-09-06 15:18:29 发布

阅读量1.6k

点赞数 2

文章标签： python javascript

原文链接：https://mp.weixin.qq.com/s/MLjyLP-z8OYBgg48gj-pDA

版权

处理爬虫是返回setCookie的一段js获取acw_sc__v2的方法

原文链接: setCookie JS反爬虫处理

处理代码

import requests
import re
import base64
from urllib import parse
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                  ' (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
}


def get_script_data(base_url):
	'''
	获取js相应参数
	'''
    response = requests.get(base_url, headers=headers)
    arg1 = re.search("arg1='([^']+)'", response.text).group(1)
    return arg1



def hexXor(_0x4e08d8, _0x23a392):
	'''
	对数据进行hexXor处理
	'''
    _0x5a5d3b = ''
    _0xe89588 = 0x0
    while _0xe89588 < len(_0x23a392) and _0xe89588 < len(_0x4e08d8):
        _0x401af1 = int(_0x23a392[_0xe89588: _0xe89588 + 0x2], 16)
        _0x105f59 = int(_0x4e08d8[_0xe89588: _0xe89588 + 0x2], 16)
        _0x189e2c = hex(_0x401af1 ^ _0x105f59)
        if len(_0x189e2c) == 0x1:
            _0x189e2c = '\x30' + _0x189e2c
        _0x5a5d3b += _0x189e2c[2:]

        _0xe89588 += 0x2
    return _0x5a5d3b


def unsbox(arg):
	'''
	对数据进行加密处理
	'''
    _0x4b082b = [0xf, 0x23, 0x1d, 0x18, 0x21, 0x10, 0x1, 0x26, 0xa, 0x9, 0x13, 0x1f, 0x28, 0x1b, 0x16, 0x17, 0x19, 0xd,
                 0x6, 0xb, 0x27, 0x12, 0x14, 0x8, 0xe, 0x15, 0x20, 0x1a, 0x2, 0x1e, 0x7, 0x4, 0x11, 0x5, 0x3, 0x1c,
                 0x22, 0x25, 0xc, 0x24]
    _0x4da0dc = [''] * 40
    _0x12605e = ''
    for _0x20a7bf in range(0, len(arg)):
        _0x385ee3 = arg[_0x20a7bf]
        for _0x217721 in range(0, len(_0x4b082b)):
            if _0x4b082b[_0x217721] == _0x20a7bf + 0x1:
                _0x4da0dc[_0x217721] = _0x385ee3
    _0x12605e = ''.join(_0x4da0dc)
    return _0x12605e


if __name__ == '__main__':
	base_url='需要获取acw_sc__v2的网址'
    arg1 = get_script_data(base_url)
    key = '3000176000856006061501533003690027800375'
    _0x23a392 = unsbox(arg1)
    arg2 = 'acw_sc__v2=' + hexXor(key, _0x23a392)
    headers['Cookie'] = arg2
    response = requests.get(base_url, headers=headers)
    print(response.text)