中国裁判网-爬虫-2018.09.28

Request URL: http://wenshu.court.gov.cn/List/ListContent

主要是后面3个参数的获取

先找到对应ajax代码

1、获取guid

创建js文件,getKey.js

在控制台console输入createGuid,找到这个

var createGuid = function () {
        return (((1 + Math.random()) * 0x10000) | 0).toString(16).substring(1);
    };
guid1 = createGuid() + createGuid() + "-" + createGuid() + "-" + createGuid() + createGuid() + "-" + createGuid() + createGuid() + createGuid();


建立python代码

def get_guid(js=''):
    node = execjs.get()
    file = 'getKey.js'
    content = open(file,encoding='utf-8-sig',errors='ignore').read()
    ctx = node.compile(content)    
    guid = ctx.eval(js)
    return guid

输出:28f0a99ff4f2355014d183a2d4ce461bf7b8805f

2、通过刚才的guid得到number值

# 将之前获得的参数‘guid’传入get_number
def get_number(guid):
    url = 'http://wenshu.court.gov.cn/ValiCode/GetCode?guid=' + guid
    headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
            }
    res = requests.get(url,headers=headers)
    return res.text

用到requests模块,自行pip,还有一定要加User-Agent,不然会认为是非法请求

输出:TUN9A843

3、获取vl5x

有点繁琐,这个值是一个cookies经过getKey()计算得到的

对应的函数位置在这里

看到,,,一脸懵逼,。。。。

参考https://blog.csdn.net/feilong_86/article/details/81675138

百度了一下js反混淆,在线转化地址:https://tool.lu/js/

强烈推荐。其他的 网站没有换行,这个看的更清楚

来解析第一句

    eval(de("eval(_fxxx('e n(7){9 d=0;j(9 i=0;i<7.k;i++){d+=(7.g(i)<<(i%m))}f d}e p(7){9 d=0;j(9 i=0;i<7.k;i++){d+=(7.g(i)<<(i%m))+i}f d}e E(7,o){9 d=0;j(9 i=0;i<7.k;i++){d+=(7.g(i)<<(i%m))+(i*o)}f d}e x(7,o){9 d=0;j(9 i=0;i<7.k;i++){d+=(7.g(i)<<(i%m))+(i+o-7.g(i))}f d}e z(7){9 7=7.8(5,5*5)+7.8((5+1)*(5+1),3);9 a=7.8(5)+7.8(-4);9 b=7.8(4)+a.8(-6);f h(7).8(4,l)}e w(7){9 7=7.8(5,5*5)+\"5\"+7.8(1,2)+\"1\"+7.8((5+1)*(5+1),3);9 a=7.8(5)+7.8(4);9 b=7.8(t)+a.8(-6);9 c=7.8(4)+a.8(6);f h(c).8(4,l)}e A(7){9 7=7.8(5,5*5)+\"r\"+7.8(1,2)+7.8((5+1)*(5+1),3);9 a=n(7.8(5))+7.8(4);9 b=n(7.8(5))+7.8(4);9 c=7.8(4)+b.8(5);f h(c).8(1,l)}e y(7){9 7=7.8(5,5*5)+\"r\"+7.8(1,2)+7.8((5+1)*(5+1),3);9 a=p(7.8(5))+7.8(4);9 b=7.8(4)+a.8(5);9 c=n(7.8(5))+7.8(4);f h(b).8(3,l)}e B(7){9 7=7.8(5,5*5)+\"2\"+7.8(1,2)+7.8((5+1)*(5+1),3);9 d=0;j(9 i=0;i<7.8(1).k;i++){d+=(7.g(i)<<(i%m))}9 s=d+7.8(4);9 d=0;9 a=7.8(5);j(9 i=0;i<a.k;i++){d+=(a.g(i)<<(i%m))+i}a=d+\"\"+7.8(4);9 b=h(7.8(1))+n(a.8(5));f h(b).8(3,l)}e v(7){9 q=u C();9 7=q.F(7.8(5,5*5)+7.8(1,2)+\"1\")+7.8((5+1)*(5+1),3);9 a=p(7.8(4,D))+7.8(-4);9 b=h(7.8(4))+a.8(2);9 a=7.8(3);9 c=n(7.8(5))+7.8(4);9 s=d+7.8(4);9 d=0;j(9 i=0;i<a.k;i++){d+=(a.g(i)<<(i%t))+i}a=d+\"\"+7.8(4);f h(7).8(4,l)}', 42, 42, '|||||||str|substr|var||||long|{0}|return|charCodeAt|hex_md5||for|length|24|16|strToLong|step|strToLongEn|base|15|aa|12|new|{1}5|{1}1|strToLongEn3|{1}3|{1}0|{1}2|{1}4|Base64|10|strToLongEn2|encode'.split('|'), 0, {}))", 4, "function|makeKey_|(k(0)+|(c(0)+"))

将黑色显示的de替换为

function de(str, count, strReplace) {
    var arrReplace = strReplace.split('|');
    for (var i = 0; i < count; i++) {
        str = str.replace(new RegExp('\\{' + i + '\\}','g'), arrReplace[i]);
    }
    return str;
}

结果为:

再将黑色的_fxxx替换为

var _fxxx = function(p, a, c, k, e, d) {
    e = function(c) {
        return (c < a ? "" : e(parseInt(c / a))) + ((c = c % a) > 35 ? String.fromCharCode(c + 29) : c.toString(36))
    }
    ;
    if (!''.replace(/^/, String)) {
        while (c--)
            d[e(c)] = k[c] || e(c);
        k = [function(e) {
            return d[e]
        }
        ];
        e = function() {
            return '\\w+'
        }
        ;
        c = 1;
    }
    ;while (c--)
        if (k[c])
            p = p.replace(new RegExp('\\b' + e(c) + '\\b','g'), k[c]);
    return p;
};

如图:

结果图下:

_fxxx和funciton 的 都在getKey对应的js里面,其他几条也是如此

观察一下,解析出来的函数包括strToLong,strToLongEn0-3,makeKey_0-399

这些函数用到下面的js

全部复制粘贴到getKey.js文件里面

下面是剩下的:

var cookie = getCookie('vjkl5');
var arrFun = [makeKey_0, makeKey_1, makeKey_2, makeKey_3, makeKey_4, makeKey_5, makeKey_6, makeKey_7, makeKey_8, makeKey_9, makeKey_10, makeKey_11, makeKey_12, makeKey_13, makeKey_14, makeKey_15, makeKey_16, makeKey_17, makeKey_18, makeKey_19, makeKey_20, makeKey_21, makeKey_22, makeKey_23, makeKey_24, makeKey_25, makeKey_26, makeKey_27, makeKey_28, makeKey_29, makeKey_30, makeKey_31, makeKey_32, makeKey_33, makeKey_34, makeKey_35, makeKey_36, makeKey_37, makeKey_38, makeKey_39, makeKey_40, makeKey_41, makeKey_42, makeKey_43, makeKey_44, makeKey_45, makeKey_46, makeKey_47, makeKey_48, makeKey_49, makeKey_50, makeKey_51, makeKey_52, makeKey_53, makeKey_54, makeKey_55, makeKey_56, makeKey_57, makeKey_58, makeKey_59, makeKey_60, makeKey_61, makeKey_62, makeKey_63, makeKey_64, makeKey_65, makeKey_66, makeKey_67, makeKey_68, makeKey_69, makeKey_70, makeKey_71, makeKey_72, makeKey_73, makeKey_74, makeKey_75, makeKey_76, makeKey_77, makeKey_78, makeKey_79, makeKey_80, makeKey_81, makeKey_82, makeKey_83, makeKey_84, makeKey_85, makeKey_86, makeKey_87, makeKey_88, makeKey_89, makeKey_90, makeKey_91, makeKey_92, makeKey_93, makeKey_94, makeKey_95, makeKey_96, makeKey_97, makeKey_98, makeKey_99, makeKey_100, makeKey_101, makeKey_102, makeKey_103, makeKey_104, makeKey_105, makeKey_106, makeKey_107, makeKey_108, makeKey_109, makeKey_110, makeKey_111, makeKey_112, makeKey_113, makeKey_114, makeKey_115, makeKey_116, makeKey_117, makeKey_118, makeKey_119, makeKey_120, makeKey_121, makeKey_122, makeKey_123, makeKey_124, makeKey_125, makeKey_126, makeKey_127, makeKey_128, makeKey_129, makeKey_130, makeKey_131, makeKey_132, makeKey_133, makeKey_134, makeKey_135, makeKey_136, makeKey_137, makeKey_138, makeKey_139, makeKey_140, makeKey_141, makeKey_142, makeKey_143, makeKey_144, makeKey_145, makeKey_146, makeKey_147, makeKey_148, makeKey_149, makeKey_150, makeKey_151, makeKey_152, makeKey_153, makeKey_154, makeKey_155, makeKey_156, makeKey_157, makeKey_158, makeKey_159, makeKey_160, makeKey_161, makeKey_162, makeKey_163, makeKey_164, makeKey_165, makeKey_166, makeKey_167, makeKey_168, makeKey_169, makeKey_170, makeKey_171, makeKey_172, makeKey_173, makeKey_174, makeKey_175, makeKey_176, makeKey_177, makeKey_178, makeKey_179, makeKey_180, makeKey_181, makeKey_182, makeKey_183, makeKey_184, makeKey_185, makeKey_186, makeKey_187, makeKey_188, makeKey_189, makeKey_190, makeKey_191, makeKey_192, makeKey_193, makeKey_194, makeKey_195, makeKey_196, makeKey_197, makeKey_198, makeKey_199, makeKey_200, makeKey_201, makeKey_202, makeKey_203, makeKey_204, makeKey_205, makeKey_206, makeKey_207, makeKey_208, makeKey_209, makeKey_210, makeKey_211, makeKey_212, makeKey_213, makeKey_214, makeKey_215, makeKey_216, makeKey_217, makeKey_218, makeKey_219, makeKey_220, makeKey_221, makeKey_222, makeKey_223, makeKey_224, makeKey_225, makeKey_226, makeKey_227, makeKey_228, makeKey_229, makeKey_230, makeKey_231, makeKey_232, makeKey_233, makeKey_234, makeKey_235, makeKey_236, makeKey_237, makeKey_238, makeKey_239, makeKey_240, makeKey_241, makeKey_242, makeKey_243, makeKey_244, makeKey_245, makeKey_246, makeKey_247, makeKey_248, makeKey_249, makeKey_250, makeKey_251, makeKey_252, makeKey_253, makeKey_254, makeKey_255, makeKey_256, makeKey_257, makeKey_258, makeKey_259, makeKey_260, makeKey_261, makeKey_262, makeKey_263, makeKey_264, makeKey_265, makeKey_266, makeKey_267, makeKey_268, makeKey_269, makeKey_270, makeKey_271, makeKey_272, makeKey_273, makeKey_274, makeKey_275, makeKey_276, makeKey_277, makeKey_278, makeKey_279, makeKey_280, makeKey_281, makeKey_282, makeKey_283, makeKey_284, makeKey_285, makeKey_286, makeKey_287, makeKey_288, makeKey_289, makeKey_290, makeKey_291, makeKey_292, makeKey_293, makeKey_294, makeKey_295, makeKey_296, makeKey_297, makeKey_298, makeKey_299, makeKey_300, makeKey_301, makeKey_302, makeKey_303, makeKey_304, makeKey_305, makeKey_306, makeKey_307, makeKey_308, makeKey_309, makeKey_310, makeKey_311, makeKey_312, makeKey_313, makeKey_314, makeKey_315, makeKey_316, makeKey_317, makeKey_318, makeKey_319, makeKey_320, makeKey_321, makeKey_322, makeKey_323, makeKey_324, makeKey_325, makeKey_326, makeKey_327, makeKey_328, makeKey_329, makeKey_330, makeKey_331, makeKey_332, makeKey_333, makeKey_334, makeKey_335, makeKey_336, makeKey_337, makeKey_338, makeKey_339, makeKey_340, makeKey_341, makeKey_342, makeKey_343, makeKey_344, makeKey_345, makeKey_346, makeKey_347, makeKey_348, makeKey_349, makeKey_350, makeKey_351, makeKey_352, makeKey_353, makeKey_354, makeKey_355, makeKey_356, makeKey_357, makeKey_358, makeKey_359, makeKey_360, makeKey_361, makeKey_362, makeKey_363, makeKey_364, makeKey_365, makeKey_366, makeKey_367, makeKey_368, makeKey_369, makeKey_370, makeKey_371, makeKey_372, makeKey_373, makeKey_374, makeKey_375, makeKey_376, makeKey_377, makeKey_378, makeKey_379, makeKey_380, makeKey_381, makeKey_382, makeKey_383, makeKey_384, makeKey_385, makeKey_386, makeKey_387, makeKey_388, makeKey_389, makeKey_390, makeKey_391, makeKey_392, makeKey_393, makeKey_394, makeKey_395, makeKey_396, makeKey_397, makeKey_398, makeKey_399];
var funIndex = strToLong(cookie) % arrFun.length;
var fun = arrFun[funIndex];
var result = fun(cookie);
return result;

result来自fun(cookie),而cookie是从vjkl5值得到的,那vjkl5是从哪来的呢?

回到network

在request header里面,那先访问网站获取即可,这个coookie是保存在后台的,因此有时长限制,其他文章说是10分钟,我没验证。反正我计划一天才跑2-3次。

获取代码如下:

# 获取Cookie
def get_vjkl5(number,guid,search_content):
    url='http://wenshu.court.gov.cn/list/list/?sorttype=1&number={}&guid={}&conditions=searchWord QWJS   全文检索:{}'.format(number,guid,search_content)
    res = requests.get(url,headers=headers)
    return res.headers['Set-Cookie']

这里获取整个cookie,之后再解析出vjkl5。后续再看。

回到getKey,里面有个getCookie函数,在console里面可以看到,本质就是从cookie中解析出vjkl5,所以直接被我干掉了,代码如下:

var cookie = vjkl5;
var arrFun = [makeKey_0, makeKey_1, makeKey_2, makeKey_3, makeKey_4, makeKey_5, makeKey_6, makeKey_7, makeKey_8, makeKey_9, makeKey_10, makeKey_11, makeKey_12, makeKey_13, makeKey_14, makeKey_15, makeKey_16, makeKey_17, makeKey_18, makeKey_19, makeKey_20, makeKey_21, makeKey_22, makeKey_23, makeKey_24, makeKey_25, makeKey_26, makeKey_27, makeKey_28, makeKey_29, makeKey_30, makeKey_31, makeKey_32, makeKey_33, makeKey_34, makeKey_35, makeKey_36, makeKey_37, makeKey_38, makeKey_39, makeKey_40, makeKey_41, makeKey_42, makeKey_43, makeKey_44, makeKey_45, makeKey_46, makeKey_47, makeKey_48, makeKey_49, makeKey_50, makeKey_51, makeKey_52, makeKey_53, makeKey_54, makeKey_55, makeKey_56, makeKey_57, makeKey_58, makeKey_59, makeKey_60, makeKey_61, makeKey_62, makeKey_63, makeKey_64, makeKey_65, makeKey_66, makeKey_67, makeKey_68, makeKey_69, makeKey_70, makeKey_71, makeKey_72, makeKey_73, makeKey_74, makeKey_75, makeKey_76, makeKey_77, makeKey_78, makeKey_79, makeKey_80, makeKey_81, makeKey_82, makeKey_83, makeKey_84, makeKey_85, makeKey_86, makeKey_87, makeKey_88, makeKey_89, makeKey_90, makeKey_91, makeKey_92, makeKey_93, makeKey_94, makeKey_95, makeKey_96, makeKey_97, makeKey_98, makeKey_99, makeKey_100, makeKey_101, makeKey_102, makeKey_103, makeKey_104, makeKey_105, makeKey_106, makeKey_107, makeKey_108, makeKey_109, makeKey_110, makeKey_111, makeKey_112, makeKey_113, makeKey_114, makeKey_115, makeKey_116, makeKey_117, makeKey_118, makeKey_119, makeKey_120, makeKey_121, makeKey_122, makeKey_123, makeKey_124, makeKey_125, makeKey_126, makeKey_127, makeKey_128, makeKey_129, makeKey_130, makeKey_131, makeKey_132, makeKey_133, makeKey_134, makeKey_135, makeKey_136, makeKey_137, makeKey_138, makeKey_139, makeKey_140, makeKey_141, makeKey_142, makeKey_143, makeKey_144, makeKey_145, makeKey_146, makeKey_147, makeKey_148, makeKey_149, makeKey_150, makeKey_151, makeKey_152, makeKey_153, makeKey_154, makeKey_155, makeKey_156, makeKey_157, makeKey_158, makeKey_159, makeKey_160, makeKey_161, makeKey_162, makeKey_163, makeKey_164, makeKey_165, makeKey_166, makeKey_167, makeKey_168, makeKey_169, makeKey_170, makeKey_171, makeKey_172, makeKey_173, makeKey_174, makeKey_175, makeKey_176, makeKey_177, makeKey_178, makeKey_179, makeKey_180, makeKey_181, makeKey_182, makeKey_183, makeKey_184, makeKey_185, makeKey_186, makeKey_187, makeKey_188, makeKey_189, makeKey_190, makeKey_191, makeKey_192, makeKey_193, makeKey_194, makeKey_195, makeKey_196, makeKey_197, makeKey_198, makeKey_199, makeKey_200, makeKey_201, makeKey_202, makeKey_203, makeKey_204, makeKey_205, makeKey_206, makeKey_207, makeKey_208, makeKey_209, makeKey_210, makeKey_211, makeKey_212, makeKey_213, makeKey_214, makeKey_215, makeKey_216, makeKey_217, makeKey_218, makeKey_219, makeKey_220, makeKey_221, makeKey_222, makeKey_223, makeKey_224, makeKey_225, makeKey_226, makeKey_227, makeKey_228, makeKey_229, makeKey_230, makeKey_231, makeKey_232, makeKey_233, makeKey_234, makeKey_235, makeKey_236, makeKey_237, makeKey_238, makeKey_239, makeKey_240, makeKey_241, makeKey_242, makeKey_243, makeKey_244, makeKey_245, makeKey_246, makeKey_247, makeKey_248, makeKey_249, makeKey_250, makeKey_251, makeKey_252, makeKey_253, makeKey_254, makeKey_255, makeKey_256, makeKey_257, makeKey_258, makeKey_259, makeKey_260, makeKey_261, makeKey_262, makeKey_263, makeKey_264, makeKey_265, makeKey_266, makeKey_267, makeKey_268, makeKey_269, makeKey_270, makeKey_271, makeKey_272, makeKey_273, makeKey_274, makeKey_275, makeKey_276, makeKey_277, makeKey_278, makeKey_279, makeKey_280, makeKey_281, makeKey_282, makeKey_283, makeKey_284, makeKey_285, makeKey_286, makeKey_287, makeKey_288, makeKey_289, makeKey_290, makeKey_291, makeKey_292, makeKey_293, makeKey_294, makeKey_295, makeKey_296, makeKey_297, makeKey_298, makeKey_299, makeKey_300, makeKey_301, makeKey_302, makeKey_303, makeKey_304, makeKey_305, makeKey_306, makeKey_307, makeKey_308, makeKey_309, makeKey_310, makeKey_311, makeKey_312, makeKey_313, makeKey_314, makeKey_315, makeKey_316, makeKey_317, makeKey_318, makeKey_319, makeKey_320, makeKey_321, makeKey_322, makeKey_323, makeKey_324, makeKey_325, makeKey_326, makeKey_327, makeKey_328, makeKey_329, makeKey_330, makeKey_331, makeKey_332, makeKey_333, makeKey_334, makeKey_335, makeKey_336, makeKey_337, makeKey_338, makeKey_339, makeKey_340, makeKey_341, makeKey_342, makeKey_343, makeKey_344, makeKey_345, makeKey_346, makeKey_347, makeKey_348, makeKey_349, makeKey_350, makeKey_351, makeKey_352, makeKey_353, makeKey_354, makeKey_355, makeKey_356, makeKey_357, makeKey_358, makeKey_359, makeKey_360, makeKey_361, makeKey_362, makeKey_363, makeKey_364, makeKey_365, makeKey_366, makeKey_367, makeKey_368, makeKey_369, makeKey_370, makeKey_371, makeKey_372, makeKey_373, makeKey_374, makeKey_375, makeKey_376, makeKey_377, makeKey_378, makeKey_379, makeKey_380, makeKey_381, makeKey_382, makeKey_383, makeKey_384, makeKey_385, makeKey_386, makeKey_387, makeKey_388, makeKey_389, makeKey_390, makeKey_391, makeKey_392, makeKey_393, makeKey_394, makeKey_395, makeKey_396, makeKey_397, makeKey_398, makeKey_399];
var funIndex = strToLong(cookie) % arrFun.length;
var fun = arrFun[funIndex];
var result = fun(cookie);
return result;

同时在getKey函数加上参数vjkl5,我这里将函数名修改了一下,避免和文件名一样

这时候通过运行js文件的getVl5x函数获取vl5x

# 通过cookie,进行getKey()得到vl5x
def get_cl5x(vjkl5):
    file = 'getKey.js'
    content = open(file,encoding='utf-8-sig',errors='ignore').read()
    vl5x = execjs.compile(content).call('getVl5x', vjkl5)
    return vl5x

输出:8ec14ef708f89f6e03885e11 (每个人运行的结果都是不同的)

----------------------------------------------------------------------------------------------------------------------------------------------------------------------

获取vjkl5,guid,number,vl5x这4个值,完成了一大半了。

剩下就是获取列表:

# 获取文书列表
def get_list(vjkl5,guid,number,vl5x,search,order):
    url = 'http://wenshu.court.gov.cn/List/ListContent' 
    data = {
            'Param': search,
            'Index': 1,
            'Page': 10,
            'Order': order,
            'Direction': 'asc',
            'vl5x': vl5x,
            'number': number,
            'guid': guid
            }
    
    header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
            'Cookie': 'vjkl5=' + vjkl5
            }
    res = requests.post(url=url,params=data,headers=header)
    return res

注意cookie里面添加vjkl5值

运行函数

if __name__ == '__main__':
    search = '案件类型:执行案件,全文检索:新吴区#'
    order = '法院层级'
    # 获取guid
    guid = get_guid(js='guid1')  
    number = get_number(guid)
    vjkl5 = get_vjkl5(number,guid,'无锡')
    # 解析出vjkl5
    vjkl5 = re.findall('vjkl5=(.*?);',vjkl5)[0]
    vl5x = get_cl5x(vjkl5)
    res = get_list(vjkl5,guid,number,vl5x,search,order)
    print(res.json())

 

部分结果如下:

_____________________________________________________________________________________________________

这个网站会经常更新反爬策略,说不定哪天这个就走不通了,如果更新,我还是会更新的。。。

_____________________________________________________________________________________________________

 

 

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值