前程无忧阿里227滑块分析

2766958292

已于 2024-08-12 16:09:29 修改

阅读量318

点赞数 11

文章标签： java python 爬虫前程无忧 227滑块阿里227 sign

于 2024-08-12 15:52:21 首次发布

本文链接：https://blog.csdn.net/ff2766958292/article/details/141133780

版权

声明:
本文章中所有内容仅供学习交流使用，不用于其他任何目的，抓包内容、敏感网址、数据接口等均已做脱敏处理，严禁用于商业用途和非法用途，否则由此产生的一切后果均与作者无关！

主要代码

import json
import re
import time
from lxml import etree
import execjs
import requests
from urllib.parse import quote
from urllib.parse import parse_qs

session = requests.session()
cookies = {

}
requestId=""
for i in range(0,100):
    page = i+1
    keyword='数据分析'
    cp = execjs.compile(open('./前程无忧.js', 'r', encoding='utf-8').read())
    headers = {

    }
    ts=round(time.time())*1000
    if page==1:
        sign=cp.call('sign', ts, page, "no", None,keyword)
    else:
        sign=cp.call('sign', ts, page, requestId, None,keyword)
    headers['sign'] = sign
    params = {
        'api_key': '51job',
        'timestamp': ts,
        'keyword': keyword,
        'searchType': '2',
        'function': '',
        'industry': '',
        'jobArea': '030200',
        'jobArea2': '',
        'landmark': '',
        'metro': '',
        'salary': '',
        'workYear': '',
        'degree': '',
        'companyType': '',
        'companySize': '',
        'jobType': '',
        'issueDate': '',
        'sortType': '0',
        'pageNum': page,
        'requestId': '',
        'keywordType': 'guess_exp_tag6',
        'pageSize': '20',
        'source': '1',
        'accountId': '',
        'pageCode': 'sou|sou|soulb',
    }
    response = session.get('/search-pc', params=params, cookies=cookies, headers=headers)
        request_info_pattern = re.compile(r"var requestInfo = ({.*?});", re.DOTALL)
        match = request_info_pattern.search(response.text)
        if match:
            request_info_str = match.group(1)
            # 由于requestInfo对象是一个字面量，可以直接使用json.loads来解析
            # request_info = json.loads(request_info_str)
            # token = re.findall('token:.*,', request_info_str)[0].split("token: ")[1][1:-2]
            token="0b72f618-4c1-4aba-9a78-f" + str(ts-6666) + "ba"
            print(token)
            refer = re.findall('refer:.*,', request_info_str)[0].split("refer: ")[1][1:-2]
            args = "/api/job/search-pc?"+re.findall('args:.*,', request_info_str)[0].split("args: ")[1][1:-2]
            args_dict = parse_qs(re.findall('args:.*,', request_info_str)[0].split("args: ")[1][1:-2])
            sign=cp.call('sign',"","","",args)
            url1 = "/analyze.jsonp"
            n=json.loads(requests.get("http://localhost:3000/get227").text)['n']
            print(n)
            params = {
                "a": "CF_APP_WAF",
                "t": token,
                "n": n,
                "p":“”,
                "scene": "register",
                "asyn": "0",
                "lang": "cn",
                "v": "1",
                "callback": f"jsonp_{time.time()*1000}"
            }
            response = session.get(url1, headers=headers, params=params)
            data=json.loads(re.findall("{.*}",response.text)[0])['result']
            csessionid=data['csessionid']
            value=data['value']
            headers = {

            }
            params = {
                'api_key': args_dict['api_key'][0],
                'timestamp': args_dict['timestamp'][0],
                'keyword': args_dict['keyword'][0],
                'searchType': '2',
                'function': '',
                'industry': '',
                'jobArea': args_dict['jobArea'][0],
                'jobArea2': '',
                'landmark': '',
                'metro': '',
                'salary': '',
                'workYear': '',
                'degree': '',
                'companyType': '',
                'companySize': '',
                'jobType': '',
                'issueDate': '',
                'sortType': '0',
                'pageNum': args_dict['pageNum'][0],
                'requestId': '',
                'keywordType': args_dict['keywordType'][0],
                'pageSize': args_dict['pageSize'][0],
                'source': '1',
                'accountId': '',
                'pageCode': args_dict['pageCode'][0],
                'u_atoken': token,
                'u_asession': csessionid,
                'u_asig': value,
                'u_aref': "123",
            }

            response = session.get('/search-pc', cookies=cookies,
                                    headers=headers,params=params)
            print(response.text)
            data=json.loads(response.text)
            requestId=data['resultbody']['requestId']
            print('pass 滑块')
            continue
        else:
            print("No requestInfo object found.")
    data=json.loads(response.text)
    if data['resultbody']['requestId']:
        requestId=data['resultbody']['requestId']
        print(str(data)[0:1000]