网站添加反爬的一些【中级】技巧


前言

作为网站站长,可能有时候会不希望被他人用爬虫程序直接抓取网站数据,这时候就可以考虑加一些反爬措施。本文简单分享一些简单的反爬技巧。

一、利用网页的 document 对象

一般爬虫(如 python 的 requests)在请求网页的时候是没有 document 环境的,因为 document 对象是特定于浏览器环境的。因此我们可以在网页 JavaScript 代码里面加入一些依赖于 document 的操作。这样哪怕爬虫程序直接模拟执行 JavaScript 代码(如 python 的 execjs 库),也无法正常运行。

因此,在用户访问我们页面时,后端可以先返回一个带有 document 操作的 js 验证代码,通过这个代码生成一些验证参数,之后携带验证参数来访问实际的数据页。这时候后端可以对验证参数进行校验,如果合法则展示数据,不合法则拦截。

例如:

<html>
<script>
	var key = "123456abc"; // 可以由后端传入也可以随机生成
	function fun(key){
		// 这里可以做一些复杂的加密/转换操作
		// 下面代码只是根据传入的 key,把每个字符的 ASCII 码往后移了两位,模拟了下简单的加密效果。
		var result = "";		
		for (var i = 0; i < key.length; i++) {
		  var charCode = key.charCodeAt(i) + 2;
		  result += String.fromCharCode(charCode);
		}
		// 这里可以把加密参数添加到下一次的请求里,比如加到URL参数里或者cookie里都可以
		alert(key + "——" + result);
	};
	document.addEventListener("DOMContentLoaded", function() {
        // 在网页元素出现之前执行的JavaScript代码
        fun(key);
    });
</script>
</html>

此时,该代码只有在网页初始加载时候会执行fun()方法,弹出弹框。把 js 部分直接复制到浏览器控制台运行不会产生任何效果

实际效果如下:

这时,你可能会有疑问,加密代码都直接写在前端了,爬虫er直接看下加密规则不就可以知道怎么绕过了吗?

此时还缺少一个步骤:代码混淆。下面是在 https://www.bejson.com/encrypt/jsobfuscate 网站上对 js 进行混淆的结果:

<html>
<script>
	var key = "123456abc";
	var _0x4c8e=['RE9NQ29udGVudExvYWRlZA==','cExLQUY=','SWduS0M=','YUpqQ0U=','M3w2fDV8NHwwfDd8MXwy','UE92eE8=','cmxhVnQ=','NXw5fDN8Mnw2fDB8N3w4fDF8NA==','c3BsaXQ=','YUdLdlk=','bG9n','bnZCUHE=','ZXJyb3I=','TnROZ3U=','cmV0dXJuIChmdW5jdGlvbigpIA==','YXBwbHk=','YWRkRXZlbnRMaXN0ZW5lcg==','V0V0TVU=','U2R2UkQ=','ZnJvbUNoYXJDb2Rl','RldLQWs=','dHJhY2U=','UHFGZ0I=','ZXFHdnU=','ZHVtQXE=','Y2hhckNvZGVBdA==','Y29uc29sZQ==','QVFqU0c=','MHw4fDJ8M3w1fDR8MXw2fDl8Nw==','dGVzdA==','ZWhiSk8=','ZEpVQ1c=','aW5mbw==','YkJvbno=','Z0hGbFY=','d2Fybg==','U0JJUHE=','bGVuZ3Ro','WnduUU8=','dlRYYXA=','SlBSUWY=','Y29uc3RydWN0b3I=','dUtaRU0=','aGRvR00=','WWNMd3U=','ZXhjZXB0aW9u','bFdqYWg=','a3Z1RUU=','XihbXiBdKyggK1teIF0rKSspK1teIF19','V3ZuRUU=','ZGZyZEE=','ZGVidWc=','e30uY29uc3RydWN0b3IoInJldHVybiB0aGlzIikoICk=','UXpoQVk=','V0FWWEs=','REtheW4=','cFVIcW4='];(function(_0x42f4db,_0x4c8e58){var _0x347ec4=function(_0x43735d){while(--_0x43735d){_0x42f4db['push'](_0x42f4db['shift']());}};var _0x894c63=function(){var _0x131770={'data':{'key':'cookie','value':'timeout'},'setCookie':function(_0xba782a,_0x257145,_0x556cdb,_0x40e1d4){_0x40e1d4=_0x40e1d4||{};var _0x53bcdb=_0x257145+'='+_0x556cdb;var _0x5e4eb3=0x0;for(var _0x4df4c3=0x0,_0x47db7f=_0xba782a['length'];_0x4df4c3<_0x47db7f;_0x4df4c3++){var _0x2cf8e8=_0xba782a[_0x4df4c3];_0x53bcdb+=';\x20'+_0x2cf8e8;var _0x4b953c=_0xba782a[_0x2cf8e8];_0xba782a['push'](_0x4b953c);_0x47db7f=_0xba782a['length'];if(_0x4b953c!==!![]){_0x53bcdb+='='+_0x4b953c;}}_0x40e1d4['cookie']=_0x53bcdb;},'removeCookie':function(){return'dev';},'getCookie':function(_0x2854ad,_0xdfb29a){_0x2854ad=_0x2854ad||function(_0x2eae2c){return _0x2eae2c;};var _0x3dc00a=_0x2854ad(new RegExp('(?:^|;\x20)'+_0xdfb29a['replace'](/([.$?*|{}()[]\/+^])/g,'$1')+'=([^;]*)'));var _0x3aad96=function(_0x12e6f3,_0x527010){_0x12e6f3(++_0x527010);};_0x3aad96(_0x347ec4,_0x4c8e58);return _0x3dc00a?decodeURIComponent(_0x3dc00a[0x1]):undefined;}};var _0x2a3dbf=function(){var _0x506320=new RegExp('\x5cw+\x20*\x5c(\x5c)\x20*{\x5cw+\x20*[\x27|\x22].+[\x27|\x22];?\x20*}');return _0x506320['test'](_0x131770['removeCookie']['toString']());};_0x131770['updateCookie']=_0x2a3dbf;var _0x1af487='';var _0x231bca=_0x131770['updateCookie']();if(!_0x231bca){_0x131770['setCookie'](['*'],'counter',0x1);}else if(_0x231bca){_0x1af487=_0x131770['getCookie'](null,'counter');}else{_0x131770['removeCookie']();}};_0x894c63();}(_0x4c8e,0x17d));var _0x347e=function(_0x42f4db,_0x4c8e58){_0x42f4db=_0x42f4db-0x0;var _0x347ec4=_0x4c8e[_0x42f4db];if(_0x347e['YHOKyB']===undefined){(function(){var _0x43735d=function(){var _0x1af487;try{_0x1af487=Function('return\x20(function()\x20'+'{}.constructor(\x22return\x20this\x22)(\x20)'+');')();}catch(_0x231bca){_0x1af487=window;}return _0x1af487;};var _0x131770=_0x43735d();var _0x2a3dbf='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';_0x131770['atob']||(_0x131770['atob']=function(_0xba782a){var _0x257145=String(_0xba782a)['replace'](/=+$/,'');var _0x556cdb='';for(var _0x40e1d4=0x0,_0x53bcdb,_0x5e4eb3,_0x4df4c3=0x0;_0x5e4eb3=_0x257145['charAt'](_0x4df4c3++);~_0x5e4eb3&&(_0x53bcdb=_0x40e1d4%0x4?_0x53bcdb*0x40+_0x5e4eb3:_0x5e4eb3,_0x40e1d4++%0x4)?_0x556cdb+=String['fromCharCode'](0xff&_0x53bcdb>>(-0x2*_0x40e1d4&0x6)):0x0){_0x5e4eb3=_0x2a3dbf['indexOf'](_0x5e4eb3);}return _0x556cdb;});}());_0x347e['lDQHaP']=function(_0x47db7f){var _0x2cf8e8=atob(_0x47db7f);var _0x4b953c=[];for(var _0x2854ad=0x0,_0xdfb29a=_0x2cf8e8['length'];_0x2854ad<_0xdfb29a;_0x2854ad++){_0x4b953c+='%'+('00'+_0x2cf8e8['charCodeAt'](_0x2854ad)['toString'](0x10))['slice'](-0x2);}return decodeURIComponent(_0x4b953c);};_0x347e['lhYSoV']={};_0x347e['YHOKyB']=!![];}var _0x894c63=_0x347e['lhYSoV'][_0x42f4db];if(_0x894c63===undefined){var _0x3dc00a=function(_0x3aad96){this['xlitOx']=_0x3aad96;this['aEJJLj']=[0x1,0x0,0x0];this['ykkfaI']=function(){return'newState';};this['PnajVi']='\x5cw+\x20*\x5c(\x5c)\x20*{\x5cw+\x20*';this['hfQOuO']='[\x27|\x22].+[\x27|\x22];?\x20*}';};_0x3dc00a['prototype']['atmiJK']=function(){var _0x2eae2c=new RegExp(this['PnajVi']+this['hfQOuO']);var _0x12e6f3=_0x2eae2c['test'](this['ykkfaI']['toString']())?--this['aEJJLj'][0x1]:--this['aEJJLj'][0x0];return this['fKVBes'](_0x12e6f3);};_0x3dc00a['prototype']['fKVBes']=function(_0x527010){if(!Boolean(~_0x527010)){return _0x527010;}return this['nGlQKY'](this['xlitOx']);};_0x3dc00a['prototype']['nGlQKY']=function(_0x506320){for(var _0x59c1ab=0x0,_0x298599=this['aEJJLj']['length'];_0x59c1ab<_0x298599;_0x59c1ab++){this['aEJJLj']['push'](Math['round'](Math['random']()));_0x298599=this['aEJJLj']['length'];}return _0x506320(this['aEJJLj'][0x0]);};new _0x3dc00a(_0x347e)['atmiJK']();_0x347ec4=_0x347e['lDQHaP'](_0x347ec4);_0x347e['lhYSoV'][_0x42f4db]=_0x347ec4;}else{_0x347ec4=_0x894c63;}return _0x347ec4;};var _0xba782a=function(){var _0x17f2a2={};_0x17f2a2[_0x347e('0x17')]=function(_0x3d6588,_0x2987dc){return _0x3d6588(_0x2987dc);};_0x17f2a2[_0x347e('0x11')]=function(_0x2c4b65,_0x54f09b){return _0x2c4b65!==_0x54f09b;};_0x17f2a2['SdvRD']=_0x347e('0x15');var _0x48dc42=_0x17f2a2;var _0x472764=!![];return function(_0x1b87cf,_0x55e5b3){if(_0x48dc42[_0x347e('0x11')](_0x48dc42[_0x347e('0x24')],_0x48dc42[_0x347e('0x24')])){_0x48dc42[_0x347e('0x17')](fun,key);}else{var _0x4475b1=_0x472764?function(){if(_0x55e5b3){var _0xa11f36=_0x55e5b3[_0x347e('0x21')](_0x1b87cf,arguments);_0x55e5b3=null;return _0xa11f36;}}:function(){};_0x472764=![];return _0x4475b1;}};}();var _0x231bca=_0xba782a(this,function(){var _0x146b6b={};_0x146b6b[_0x347e('0x2a')]=_0x347e('0x31');_0x146b6b[_0x347e('0x23')]='return\x20/\x22\x20+\x20this\x20+\x20\x22/';_0x146b6b[_0x347e('0x33')]=function(_0x20beac){return _0x20beac();};var _0xdbfb3d=_0x146b6b;var _0x232254=function(){if(_0xdbfb3d['dumAq']===_0x347e('0x1d')){var _0x3bf58c=fn[_0x347e('0x21')](context,arguments);fn=null;return _0x3bf58c;}else{var _0x3ab0fd=_0x232254[_0x347e('0x2')](_0xdbfb3d[_0x347e('0x23')])()['compile'](_0x347e('0x9'));return!_0x3ab0fd[_0x347e('0x2f')](_0x231bca);}};return _0xdbfb3d[_0x347e('0x33')](_0x232254);});_0x231bca();var _0x131770=function(){var _0x396bb6={};_0x396bb6['FWKAk']=function(_0x28c2ef,_0xe75416){return _0x28c2ef===_0xe75416;};_0x396bb6[_0x347e('0x18')]=_0x347e('0x38');_0x396bb6[_0x347e('0x2d')]=function(_0x213cd8,_0x166db3){return _0x213cd8(_0x166db3);};_0x396bb6[_0x347e('0xa')]=function(_0x56ab8a,_0x4281c5){return _0x56ab8a+_0x4281c5;};_0x396bb6[_0x347e('0x4')]=_0x347e('0x20');var _0x531827=_0x396bb6;var _0x5dfde2=!![];return function(_0x2c25bd,_0x51ceba){var _0x454a04={};_0x454a04[_0x347e('0x30')]=function(_0x5f52c2,_0x334f84){return _0x531827[_0x347e('0x2d')](_0x5f52c2,_0x334f84);};_0x454a04[_0x347e('0x13')]=function(_0x1eb632,_0x2be600){return _0x531827[_0x347e('0xa')](_0x1eb632,_0x2be600);};_0x454a04[_0x347e('0x3')]=_0x531827[_0x347e('0x4')];_0x454a04['BHEqx']=_0x347e('0xd');var _0x5a7dfb=_0x454a04;var _0x551bd2=_0x5dfde2?function(){if(_0x51ceba){if(_0x531827[_0x347e('0x26')](_0x531827[_0x347e('0x18')],_0x531827['rlaVt'])){var _0x1e5946=_0x51ceba[_0x347e('0x21')](_0x2c25bd,arguments);_0x51ceba=null;return _0x1e5946;}else{var _0x3e7369=_0x5a7dfb[_0x347e('0x30')](Function,_0x5a7dfb[_0x347e('0x13')](_0x5a7dfb['pLKAF'](_0x5a7dfb['uKZEM'],_0x5a7dfb['BHEqx']),');'));that=_0x3e7369();}}}:function(){};_0x5dfde2=![];return _0x551bd2;};}();var _0x43735d=_0x131770(this,function(){var _0x5c557e={};_0x5c557e['QzhAY']=_0x347e('0x2e');_0x5c557e['gHFlV']=function(_0x56e54b,_0x2dddb8){return _0x56e54b===_0x2dddb8;};_0x5c557e[_0x347e('0x36')]='vTXap';_0x5c557e['aGKvY']=function(_0x488db8,_0x1bc19d){return _0x488db8(_0x1bc19d);};_0x5c557e['PqFgB']=function(_0x3733d1,_0x21e483){return _0x3733d1+_0x21e483;};_0x5c557e['jahRd']='return\x20(function()\x20';_0x5c557e[_0x347e('0x10')]=_0x347e('0xd');_0x5c557e[_0x347e('0xf')]=function(_0xe6b4d4,_0x488fb2){return _0xe6b4d4!==_0x488fb2;};_0x5c557e[_0x347e('0x14')]=_0x347e('0x29');_0x5c557e['wgAtA']=_0x347e('0x16');var _0x3f6271=_0x5c557e;var _0x318ec5=function(){};var _0x5af1df;try{if(_0x3f6271[_0x347e('0x34')](_0x3f6271['SBIPq'],_0x347e('0x0'))){var _0x2cba01=_0x3f6271[_0x347e('0x1b')](Function,_0x3f6271[_0x347e('0x28')](_0x3f6271['jahRd']+_0x3f6271[_0x347e('0x10')],');'));_0x5af1df=_0x2cba01();}else{if(fn){var _0x503b36=fn['apply'](context,arguments);fn=null;return _0x503b36;}}}catch(_0x1b556a){_0x5af1df=window;}if(!_0x5af1df[_0x347e('0x2c')]){_0x5af1df[_0x347e('0x2c')]=function(_0x4f9efa){var _0x54d609=_0x347e('0x19')[_0x347e('0x1a')]('|');var _0x50cbbe=0x0;while(!![]){switch(_0x54d609[_0x50cbbe++]){case'0':_0x3237ff[_0x347e('0x1e')]=_0x4f9efa;continue;case'1':_0x3237ff['trace']=_0x4f9efa;continue;case'2':_0x3237ff['debug']=_0x4f9efa;continue;case'3':_0x3237ff[_0x347e('0x35')]=_0x4f9efa;continue;case'4':return _0x3237ff;case'5':var _0x3237ff={};continue;case'6':_0x3237ff[_0x347e('0x32')]=_0x4f9efa;continue;case'7':_0x3237ff['exception']=_0x4f9efa;continue;case'8':_0x3237ff['table']=_0x4f9efa;continue;case'9':_0x3237ff[_0x347e('0x1c')]=_0x4f9efa;continue;}break;}}(_0x318ec5);}else{if(_0x3f6271[_0x347e('0xf')](_0x3f6271[_0x347e('0x14')],_0x347e('0x29'))){_0x5af1df[_0x347e('0x2c')]=function(_0x527b56){var _0x300944=_0x3f6271[_0x347e('0xe')]['split']('|');var _0x2ffc4f=0x0;while(!![]){switch(_0x300944[_0x2ffc4f++]){case'0':var _0x259bfd={};continue;case'1':_0x259bfd['exception']=_0x527b56;continue;case'2':_0x259bfd[_0x347e('0x35')]=_0x527b56;continue;case'3':_0x259bfd[_0x347e('0xc')]=_0x527b56;continue;case'4':_0x259bfd[_0x347e('0x1e')]=_0x527b56;continue;case'5':_0x259bfd[_0x347e('0x32')]=_0x527b56;continue;case'6':_0x259bfd['table']=_0x527b56;continue;case'7':return _0x259bfd;case'8':_0x259bfd['log']=_0x527b56;continue;case'9':_0x259bfd[_0x347e('0x27')]=_0x527b56;continue;}break;}}(_0x318ec5);}else{var _0xb4f876=_0x3f6271['wgAtA']['split']('|');var _0x23a8c9=0x0;while(!![]){switch(_0xb4f876[_0x23a8c9++]){case'0':_0x5af1df[_0x347e('0x2c')][_0x347e('0x1e')]=_0x318ec5;continue;case'1':_0x5af1df['console']['table']=_0x318ec5;continue;case'2':_0x5af1df[_0x347e('0x2c')]['trace']=_0x318ec5;continue;case'3':_0x5af1df[_0x347e('0x2c')][_0x347e('0x1c')]=_0x318ec5;continue;case'4':_0x5af1df['console']['info']=_0x318ec5;continue;case'5':_0x5af1df[_0x347e('0x2c')][_0x347e('0xc')]=_0x318ec5;continue;case'6':_0x5af1df[_0x347e('0x2c')][_0x347e('0x35')]=_0x318ec5;continue;case'7':_0x5af1df[_0x347e('0x2c')][_0x347e('0x6')]=_0x318ec5;continue;}break;}}}});_0x43735d();function fun(_0x362de4){var _0x3a0732={};_0x3a0732[_0x347e('0x7')]=_0x347e('0xb');_0x3a0732['TWPiJ']=_0x347e('0x1f');_0x3a0732[_0x347e('0x8')]=function(_0x586297,_0xeecbea){return _0x586297+_0xeecbea;};_0x3a0732[_0x347e('0x1')]=function(_0x274be7,_0x4622cc){return _0x274be7(_0x4622cc);};var _0x1d5e20=_0x3a0732;var _0x304a5f='';for(var _0x1d7701=0x0;_0x1d7701<_0x362de4[_0x347e('0x37')];_0x1d7701++){if(_0x1d5e20[_0x347e('0x7')]===_0x1d5e20['TWPiJ']){var _0x5b2c2f=_0x362de4['charCodeAt'](_0x1d7701)+0x2;_0x304a5f+=String[_0x347e('0x25')](_0x5b2c2f);}else{var _0x438af1=_0x1d5e20[_0x347e('0x8')](_0x362de4[_0x347e('0x2b')](_0x1d7701),0x2);_0x304a5f+=String['fromCharCode'](_0x438af1);}}_0x1d5e20[_0x347e('0x1')](alert,_0x1d5e20[_0x347e('0x8')](_0x1d5e20[_0x347e('0x8')](_0x362de4,'——'),_0x304a5f));};document[_0x347e('0x22')](_0x347e('0x12'),function(){var _0x21da28={};_0x21da28[_0x347e('0x5')]=function(_0x347650,_0x32a20d){return _0x347650(_0x32a20d);};var _0x4faba5=_0x21da28;_0x4faba5['YcLwu'](fun,key);});
</script>
</html>

这时就能达到较简单反爬目的了~


下面是 TapTap 的页面反爬代码,原理同上。
在这里插入图片描述

二、破解方法(反反爬)

这种反爬方法肯定不是最终极的,想绕过还是有很多办法的,只是相对比较麻烦。能防大部分“君子”,“小人”还是拦不住的。

  1. 利用自动化工具(最简单的)
    直接使用 Python 的 Selenium 或者 Playwright 等自动化工具,可以控制一个真正的浏览器,这样就可以完全模拟浏览器环境,执行任何JS代码。
    (优点:简单粗暴,缺点:效率偏低 + 没有成就感

  2. 利用 pyexecjs 库 + nodejs 的 jsdom 库(比较麻烦)
    用 execjs 执行 js 代码 + 用 jsdom 创建虚拟的 document 对象。
    (这种方法需要安装 nodejs 环境,相对来说比较麻烦)
    注:下面只是一个简单的工具使用 demo,针对上述的反爬策略,需要实际写代码去测试。

    import execjs
    jscode = """
    	const jsdom = require("jsdom");
    	const { JSDOM } = jsdom;
    	const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
    	window = dom.window;
    	document = window.document;
    	XMLHttpRequest = window.XMLHttpRequest;
    		
    	function add(a, b){
        	return a + b;
        }
    """
    ct = execjs.compile(jscode, cwd=r'C:\Users\xxx\AppData\Roaming\npm\node_modules')
    print(ct.call('add'))
    

    可参考文章:
    1、https://blog.csdn.net/aobian2884/article/details/101404508
    2、https://blog.csdn.net/kxltsuperr/article/details/132957967

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

薄荷你玩_

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值