python-css伪类元素反爬(二)

目标网站为http://js-crack-course-14-2.crawler-lab.com/

观察源码得到某些字在伪类里面,并且是由js生成css。所以又要进行扣代码。

在扣代码时陷入了一种坑,导致扣了一天,最后在调试中恍然大悟。

那就是当变量是由一段很长的啥玩意得到的时候(如下),秉着缺啥补啥原则,补完了,然后报一个很懵逼的错,最后百思不得其解。其实这个变量是一个固定值,多刷新几遍多调试几遍,然后把值直接赋上即可。

        _0x177677 = _0x350191()['' + _0x1573d3() + _0x106d9b() + _0x4ce581() + _0x398f4b() + function () {
            'return\x20jN_';
            return 'ICo';
        }() + _0xb2755c + _0x51ef06() + _0x2ada77() + function () {
            'return\x20wV_';
            return 'n';
        }() + _0x5a39ba()]('' + _0x25e7cb() + '中价' + _0x594a95() + _0x3bf458() + _0x5c4049() + _0x34cbde() + _0x51e8c4() + _0x1374f3() + _0x1a2d89() + _0x2a7b8e() + '号合' + _0x684929 + function (_0x5a77e6) {
            'return\x20yc_';
            return _0x5a77e6;
        }('后商') + _0x3177d6() + function (_0x25ce2b) {
            'return\x20IN_';
            return _0x25ce2b;
        }('型增') + _0x296fca() + _0x196ff1() + _0x2b7913() + _0x3ffdfe() + _0x3e2116 + _0x49ef41() + _0x5b5d21() + _0x2ce7f1() + _0x40c965() + _0x2e8d7f() + function () {
            'return\x20NT_';
            return '或';
        }() + _0x196e52 + _0x97a1e2() + _0x1ee14f() + _0x30bf64() + _0x299605() + function () {
            'return\x20sn_';
            return '机';
        }() + '杆构' + _0x180141() + _0x4d3d70() + _0x337624() + function (_0x319521) {
            'return\x20Ye_';
            return _0x319521;
        }('测涡') + function () {
            'return\x20HB_';
            return '燃片版';
        }() + _0x3a8b92() + _0x262cdf() + _0x340e48 + _0x44fead() + _0x457a31 + _0x30724d() + _0xf80c64() + _0x22b83e() + _0x3c4dd0() + function (_0x3d9b39) {
            'return\x20Cy_';
            return _0x3d9b39;
        }('耗胎') + _0x2226a2 + _0x14bdb4() + _0x28fc49() + _0x27c309() + _0x12128e() + _0x3cefef() + function () {
            'return\x20Vs_';
            return '适';
        }() + '通速' + _0x169493() + _0x28e027 + _0x21fea1() + _0x544ee9() + function (_0x3a7a6f) {
            'return\x20aZ_';
            return _0x3a7a6f;
        }('风马') + _0x2351f2() + _0xf6e1b6(''));

/

以下是扣好的代码:  JS逆向第14课-2.js

var _0x1dbeb3 = function () {
    'return\x20mt_';
    return 'c';
};

function _0x265117() {
    'return\x20Ei_';
    return 'h';
}

var _0x4f0639 = function () {
    'return\x20LN_';
    return 'a';
};

function _0x1867d8() {
    'return\x20Mp_';
    return 'rA';
}

function _0x1b5075() {
    'return\x20KT_';
    return 't';
}

function _0x13ff1b(_0x451167) {
    return _0x177677['' + _0x1dbeb3() + _0x265117() + _0x4f0639() + _0x1867d8() + _0x1b5075()](parseInt(_0x451167));
}

function _0x3e9934() {
    function _0x532cf3() {
        return 's';
    }

    if (_0x532cf3() == 's') {
        return 's';
    } else {
        return _0x532cf3();
    }
}

function _0x43bfd9() {
    function _0x5bcf9d() {
        return 'pl';
    }

    if (_0x5bcf9d() == 'pl,') {
        return 'uN_';
    } else {
        return _0x5bcf9d();
    }
}

var _0x4c9e32 = function () {
    'uD_';
    var _0x4f49a2 = function () {
        return 'i';
    };
    return _0x4f49a2();
};
var _0x3e81f0 = function () {
    'return\x20vy_';
    return 't';
};

function _0x59c0a4(_0xe1921, _0x5993bb) {
    if (_0xe1921) {
        return _0xe1921['' + _0x3e9934() + _0x43bfd9() + _0x4c9e32() + _0x3e81f0()](_0x5993bb);
    } else {
        return '';
    }
}

var _0x44edbf = ["93", "39,77", "0", "71,23,102,103", "59,76", "49,26", "88,95", "36,43", "19,106", "51,28", "58,101,48", "87,101,48", "64", "107,87", "19,41,55", "30,105", "27,63", "47,97", "45,91,38", "59,81", "4,59", "96,58", "9,89,86", "33,37", "42,11", "78,17", "41,55", "79,67", "9,8,12,22", "27,92,53", "25,15", "1,29", "26,82", "50,102", "11,66", "17,98", "47,7", "9,80", "9,41,55", "108,37", "99", "19,89,82", "84,56", "2", "90,86", "46", "44,70", "79,3", "100,37", "1,24,87", "13,10", "83,74", "68,38", "6", "79,40", "20", "69,21", "14", "65,75", "62,59", "32,72", "31,60", "9,89,82", "52,54", "71,17,22", "18,73", "85,5", "58,79", "9,57", "85,97", "19,8,12,22", "61,89", "35,95,22", "94,104", "19,89,86", "16", "34"]
var _0x177677 = "万中价体供保元列制前力功动助华压号合名后商喷器地型增备多大央宝实容宽导差年度式弹径悬成或扭承指排数整时最机杆构架格桥气油测涡燃片版独率盖盘直矩离积称程立箱簧综缸置耗胎行规质距车转轮轴载连适通速配量金铝长门间隙风马驱驻高"

function _0x359c64() {
    var obj = {}
    for ($index$ = 0; $index$ < _0x44edbf['length']; $index$++) {
        var _0x552262 = _0x59c0a4(_0x44edbf[$index$], ',');
        var _0x50d667 = '';
        for ($itemIndex$ = 0; $itemIndex$ < _0x552262['length']; $itemIndex$++) {
            _0x50d667 += _0x13ff1b(_0x552262[$itemIndex$]) + '';
        }
        // console.log($index$, _0x50d667);
        obj[$index$] = _0x50d667;
    }
    return obj
}

/

以下为py程序

import requests
import execjs
import re
from parsel import Selector

def get_data():
    with open('JS逆向第14课-2.js','r',encoding='utf-8') as f:
        ctx = execjs.compile(f.read())
    data = ctx.call('_0x359c64')
    return data

# 此数据可从以上函数获得,也是一个固定值。
data = {
        '0': '适', '1': '弹簧', '2': '万', '3': '离地间隙', '4': '油箱', '5': '整备', '6': '转速',
        '7': '年或', '8': '后驱', '9': '最大', '10': '气门数', '11': '车门数', '12': '版', '13': '驻车',
        '14': '后悬架', '15': '宝马', '16': '多片', '17': '排量', '18': '承载式', '19': '油耗', '20': '供油',
        '21': '配气', '22': '前轮距', '23': '宽度', '24': '成功', '25': '综合', '26': '悬架', '27': '缸盖',
        '28': '前制动器', '29': '多连杆', '30': '增压', '31': '中央', '32': '备胎', '33': '时间', '34': '功率',
        '35': '合金', '36': '排列', '37': '前置', '38': '前悬架', '39': '高度', '40': '铝', '41': '后轮胎',
        '42': '规格', '43': '价', '44': '轴距', '45': '指', '46': '扭矩', '47': '缸体', '48': '长度',
        '49': '中型车', '50': '助力', '51': '行程', '52': '盘式', '53': '元', '54': '缸径', '55': '商',
        '56': '直喷', '57': '华', '58': '独立', '59': '燃油', '60': '容积', '61': '实测', '62': '前轮胎',
        '63': '机构', '64': '离合器', '65': '名称', '66': '质保', '67': '气缸', '68': '前桥', '69': '质量',
        '70': '后制动器', '71': '涡轮', '72': '差速器', '73': '通风', '74': '后轮距', '75': '号', '76': '导'
}


def spdier():
    url = 'http://js-crack-course-14-2.crawler-lab.com/'
    headers = {
        'Cookie': 'crawlerlab_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE1ODY5NjI1ODYsImlkIjoxNjA1LCJuYW1lIjoiMTU4Njg4MTU2ODcifQ.Fhx2VXHieFT0Sg7KZElTLqdUucgDEgZIhTDn7YaubhY'
    }
    resp_text = requests.get(url,headers=headers).content.decode()
    # 在原文中进行替换
    for i in re.findall(r'<span class="hs_kw\d+_configCP"></span>', resp_text):
        num = re.search(r'<span class="hs_kw(\d+)_configCP"></span>', i).group(1)
        resp_text = resp_text.replace(i, data.get(num))
    # 采用xpath
    # sel = Selector(resp_text)
    # trs = sel.xpath('//tbody/tr[position()>1]')
    # for tr in trs:
    #     th = tr.xpath('./th/div/a/text()').extract_first().replace('\n','').replace(' ','')
    #     td = tr.xpath('./td/div/text()').extract_first()
    #     print(th,td)
    # 采用css
    sel = Selector(resp_text)
    trs = sel.css('tbody tr')
    trs.pop(0)
    for tr in trs:
        th = tr.css('th div a::text').extract_first().replace('\n','').replace(' ','')
        td = tr.css('td div::text').extract_first()
        print(th,td)


spdier()
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值