关于淘宝爬虫记录

1.点击搜索结果 通过抓包工具拿到参数

查找sign: ,可以找到其加密参数j,j = h(d.token + "&" + i + "&" + g + "&" + c.data)

在控制台刷新测试后发现 d.token是定值 取的是cookie中的_m_h5_tk值

response = requests.get(url=url,data=data,headers=headers)//请求第一次获取cookie
cookies = response.cookies
_m_h5_tk = response.cookies.get('_m_h5_tk')
_m_h5_tk_enc = response.cookies.get('_m_h5_tk_enc')
token=_m_h5_tk.split('_')[0]

 接下来测试参数 i, 根据其格式可见其为时间戳

第一次

刷新后变化

JS:

i = (new Date).getTime()

Py:

timetap=str(round(time.time() * 1000))

可获取其时间戳

接下来测试参数g

第一次 

第二次

发现其为固定值,是参数中的AppKey

g = c.appKey || ("waptest" === d.subDomain ? "4272" : "12574478")

接下来对c.data进行测试

发现其是参数中data那一串

至此 MD5加密参数已经确定

JS://j = h(d.token + "&" + i + "&" + g + "&" + c.data)

var CryptoJS = require('crypto-js');
console.log(CryptoJS.MD5(填入参数d.token + "&" + i + "&" + g + "&" + c.data)).toString());

Py:

import hashlib
def funtion_md5(md5_token):
    CryptoPy=hashlib.md5()
    CryptoPy.update(md5_token.encode('UTF-8'))
    return CryptoPy.hexdigest()

md5_data='{"pNum":0,"pSize":"60","refpid":"mm_26632258_3504122_32538762","variableMap":"{\"q\":\"阿尔宙斯\",\"navigator\":false,\"clk1\":\"a57d3ed53fe20cb070c04c487d3db2b4\",\"union_lens\":\"recoveryid:201_33.44.33.230_17255766_1646190106531;prepvid:201_33.51.64.107_16190356_1646192584941\",\"recoveryId\":\"201_33.51.64.129_16190587_1646192627554\"}","qieId":"36308","spm":"a2e0b.20350158.31919782","app_pvid":"201_33.51.64.129_16190587_1646192627554","ctm":"spm-url:a2e0b.20350158.search.1;page_url:https%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Frefpid%3Dmm_26632258_3504122_32538762%26keyword%3D%25E9%2598%25BF%25E5%25B0%2594%25E5%25AE%2599%25E6%2596%25AF%26clk1%3Da57d3ed53fe20cb070c04c487d3db2b4%26upsId%3Da57d3ed53fe20cb070c04c487d3db2b4%26spm%3Da2e0b.20350158.search.1%26pid%3Dmm_26632258_3504122_32538762%26union_lens%3Drecoveryid%253A201_33.44.33.230_17255766_1646190106531%253Bprepvid%253A201_33.51.64.107_16190356_1646192584941"}

md5_token = token【取的是cookie的_m_h5_tk值,上诉已知】 + "&" + timetap【时间戳,上述已知】 + "&" + appKey【已知】 + "&" + md5_data

至此 param中的sign就等于

sign=funtion_md5(md5_token)
params= {
    'jsv': '2.5.1',
    'appKey': AppKey,
    't': timetap,
    'sign': sign,
    'api': 'mtop.alimama.union.xt.en.api.entry',
    'v': '1.0',
    'AntiCreep': 'true',
    'timeout': '20000',
    'AntiFlood': 'true',
    'type': 'jsonp',
    'dataType': 'jsonp',
    'callback': 'mtopjsonp2',
    'data': md5_data
}

 第二次请求中,cookie需要进行拼接:

headers = {

    'cache-control': 'no-cache',
    'content-type': "application/json;charset=UTF-8",
    'date': 'Sat, 26 Feb 2022 08:46:31 GMT',
    'eagleeye-traceid': '212c2ff716458651912284954ebb15',
    'mtop-x-provider': 'f99e8e27f6c6a8b7ddd9398c7c7eae3c6a76561ae25bd2bb16cfe1fa6bf1c89ea46681b75da502bee029d66fcd3fd2c6',
    'p3p': "CP='CURa ADMa DEVa PSAo PSDo OUR BUS UNI PUR INT DEM STA PRE COM NAV OTC NOI DSP COR'",
    'pragma': 'no-cache',
    's-rt': '269',
    's_group': 'tao-session',
    's_host': '5765485370756c5765584e6b3532444f45523452486361756238336b314c7841615a4c6f4762596135623130',
    's_ip': '457079564a2b6e6772542b5238336d41',
    's_status': 'STATUS_NOT_EXISTED',
    's_tag': '285873024335892|134217728^|^^',
    's_tid': '212c2ff716458651912284954ebb15',
    's_ucode': 'CN:CENTER',
    's_v': '4.0.1.4',
    'server': 'Tengine/Aserver',
    'set-cookie': 'cookie2=16c32f4a83d0819a80b490b834f2f902;Path=/;Domain=.taobao.com;Max-Age=-1;HttpOnly',
    'timing-allow-origin': '*',
    'vary': 'Accept-Encoding',
    'x-aserver-sret': 'SUCCESS',
    'x-eagleeye-id': '212c2ff716458651912284954ebb15',
    'x-node': '70f38c2a7fdeeda2e75f69a1a718a84b',
    'x-powered-by': 'm.taobao.com',

    "cookie":"_m_h5_tk=" + _m_h5_tk + "; " + "_m_h5_tk_enc=" + _m_h5_tk_enc
}

 求得所以参数后 发送请求获取即可;

 

 

 

 

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值