js逆向入门

中国土地市场网

1.分析请求过程

在这里插入图片描述
在控制台中获取相应的参数
在这里插入图片描述

2.分析js并修改参数

function stringToHex(str) {
        var val = "";
        for (var i = 0; i < str.length; i++) {
            if (val == "") val = str.charCodeAt(i).toString(16); else val += str.charCodeAt(i).toString(16);
        }
        return val;
    }

function YunSuoAutoJump(text) {
    // var width = screen.width;
    var width = "1920";
    // var height = screen.height;
    var height = "1080";
    if (text == null){
        var screendate = width + "," + height;
    } else {
        var screendate = text;
    }

    // var curlocation = window.location.href;
    // if (-1 == curlocation.indexOf("security_verify_")) {
    //     document.cookie = "srcurl=" + stringToHex(window.location.href) + ";path=/;";
    // }
    var location = "/default.aspx?tabid=226&security_verify_data=" + stringToHex(screendate);
    return location;
}

// 接收传入的参数
var _ = process.argv.splice(2)
console.log(YunSuoAutoJump(_[0]));

构建相应的爬虫代码

# -*- coding: utf-8 -*-
# @Time    : 2019/11/1 9:45
# @Author  :

import os
import re

import requests
from lxml import etree


def generate_signature(value):
    """
    generate _signature parameter
    :param value:share_url id
    :return:signature string
    """
    cwd = os.path.dirname(__file__)
    p = os.popen('cd %s && node landchina.js %s' % (cwd, value))
    return p.readlines()[0]


def landchina():
    # 构建session会话
    s = requests.Session()
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Host': 'www.landchina.com',
        'Pragma': 'no-cache',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
    }
    s.headers = headers

    # 第一次请求
    url = "https://www.landchina.com/default.aspx?tabid=226"
    resp1 = s.get(url=url)

    cookie1 = resp1.headers["Set-Cookie"]
    cookie_re = "(security_session_verify=\w+;)"
    security_session_verify = "".join(re.findall(cookie_re, cookie1))
    security_session_verify_url = generate_signature("https://www.landchina.com/default.aspx?tabid=226").strip()

    s.headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Cookie': security_session_verify + " srcurl=" + security_session_verify_url[
                                                         security_session_verify_url.rfind("=") + 1:],
        'Host': 'www.landchina.com',
        'Pragma': 'no-cache',
        'Referer': 'https://www.landchina.com/default.aspx?tabid=226',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
    }
    var_re = "(security_verify_data.*)"
    var = "".join(re.findall(var_re, generate_signature("")))

    # 第二次请求
    url2 = "https://www.landchina.com/default.aspx?tabid=226&" + var
    resp2 = s.get(url2)
    cookie2 = resp2.headers["Set-Cookie"]
    cookie_re = "(security_session_mid_verify=\w+;)"
    security_session_mid_verify = "".join(re.findall(cookie_re, cookie2))
    s.headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Cookie': '%s; %s' % (security_session_verify, security_session_mid_verify),
        'Host': 'www.landchina.com',
        'Pragma': 'no-cache',
        'Referer': url2,
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36',
    }

    # 第三次请求
    rep = s.get(url=url)
    mytree = etree.HTML(rep.text)

    urls = mytree.xpath('//*[@id="TAB_contentTable"]//tr/td[@class="queryCellBordy"]/a')
    for ur in urls:
        title = "".join(ur.xpath(".//text()"))
        c_url = "https://www.landchina.com/"+"".join(ur.xpath("./@href"))
        data = {
            "title" : title,
            "c_url": c_url,
        }
        print(data)


if __name__ == '__main__':
    landchina()

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 像素格子 设计师:CSDN官方博客 返回首页