python webviewer爬虫_爬虫再也不怕检测浏览器环境了

背景

之前爬虫 驱动个 selenium 基本上就可以了.

但是现在各种检测浏览器环境...特别是不熟悉 js 的同学就更烦了

本文是直接把 selenium pyppeteer 以及正常打开浏览器 的环境差异直接列出来

这样你就可以更愉快的爬虫了(可以直接把环境全部模拟上,或者大概看看有啥,下次看人家混淆 js 的时候心里有个数)

原理

就是遍历 window 对象,把属性全部保存成 json 文件

function recur(obj) {

var result = {},

_tmp;

for (var i in obj) {

// enabledPlugin is too nested, also skip functions

if (i === 'enabledPlugin' || typeof obj[i] === 'function') {

continue;

} else if (typeof obj[i] === 'object') {

// get props recursively

_tmp = recur(obj[i]);

// if object is not {}

if (Object.keys(_tmp).length) {

result[i] = _tmp;

}

} else {

// string, number or boolean

result[i] = obj[i];

}

}

return result;

}

function go() {

var j = new Object();

var propertys = Object.getOwnPropertyNames(window);

propertys.forEach(element => {

if (element === 'globalThis' ||

element === 'Illegal' ||

element === 'parent' ||

element === 'top' ||

element === 'frames' ||

element === 'self' ||

element === 'window' ||

element === 'document'

) {} else {

eval("j." + element + "=recur(window." + element + ")")

}

});

console.log(JSON.stringify(j))

}

go()

然后分别正常打开,selenium 打开,pyppeteer 打开

再查看方法的差异

操作

获取 json

下载本项目

启动本地 server python -m http.server 80

打开 http://localhost/

正常打开最好是无痕模式,因为浏览器扩展可能会导致多出很多浏览器扩展的属性

打开网页以后 -> 打开开发者工具 -> 切换到 console

然后点击 copy,粘贴 json 到对应的文件内

{% asset_img aaaaa2.png %}

selenium pyppeteer 打开可以参考getEnvironment.py

比较

直接运行 python diff.py就可以了.

下面可以看到

差异有

root['navigator']['webdriver'], root['clientInformation']['webdriver'] 等等

这个算是目前检测用的比较多的,还有一些其他的可以自己运行看看

selenuim 与正常环境的差异

############################################################

{ 'dictionary_item_added': [root['cdc_adoQpoasnfa76pfcZLmcfl_Symbol'], root['cdc_adoQpoasnfa76pfcZLmcfl_Array'], root['cdc_adoQpoasnfa76pfcZLmcfl_Promise'], root['navigator']['webdriver'], root['clientInformation']['webdriver']],

'dictionary_item_removed': [root['chrome']['runtime'], root['navigator']['doNotTrack'], root['navigator']['languages']['2'], root['clientInformation']['doNotTrack'], root['clientInformation']['languages']['2']],

'type_changes': { "root['visualViewport']['height']": { 'new_type': ,

'new_value': 948,

'old_type': ,

'old_value': 751.2000122070312}},

'values_changed': { "root['clientInformation']['connection']['downlink']": { 'new_value': 1.3,

'old_value': 1.45},

"root['screen']['availHeight']": { 'new_value': 1027,

'old_value': 1050},

"root['screen']['availTop']": { 'new_value': 23,

'old_value': 0},

"root['visualViewport']['width']": { 'new_value': 840,

'old_value': 1344}}}

pyppeteer 与正常环境的差异

############################################################

{ 'dictionary_item_added': [root['WebKitAnimationEvent'], root['RTCRtpContributingSource'], root['SVGDiscardElement'], root['MediaCapabilitiesInfo'], root['WebKitTransitionEvent'], root['chrome']['webstore'], root['navigator']['webdriver'], root['clientInformation']['webdriver']],

'dictionary_item_removed': [root['BackgroundFetchManager'], root['RTCPeerConnectionIceErrorEvent'], root['WritableStreamDefaultWriter'], root['XRDOMOverlayState'], root['TrustedTypePolicy'], root['GeolocationCoordinates'], root['External'], root['ClipboardItem'], root['XRTransientInputHitTestSource'], root['XRRay'], root['PerformanceElementTiming'], root['XRViewerPose'], root['RTCError'], root['TextDecoderStream'], root['BackgroundFetchRegistration'], root['TrustedHTML'], root['onpointerrawupdate'], root['XRSession'], root['XRSessionEvent'], root['XRReferenceSpaceEvent'], root['XRReferenceSpace'], root['BarcodeDetector'], root['SubmitEvent'], root['AnimationEffect'], root['PeriodicSyncManager'], root['onformdata'], root['onselectstart'], root['TrustedScriptURL'], root['XRRenderState'], root['SpeechSynthesisErrorEvent'], root['RTCIceTransport'], root['TextEncoderStream'], root['MediaSession'], root['XRHitTestSource'], root['GeolocationPositionError'], root['XRPose'], root['XRView'], root['ElementInternals'], root['XRInputSourceArray'], root['XRInputSourceEvent'], root['XRFrame'], root['DecompressionStream'], root['TrustedScript'], root['Geolocation'], root['TrustedTypePolicyFactory'], root['XRBoundedReferenceSpace'], root['FeaturePolicy'], root['PerformanceEventTiming'], root['LargestContentfulPaint'], root['VideoPlaybackQuality'], root['onselectionchange'], root['XRSpace'], root['XRInputSourcesChangeEvent'], root['GeolocationPosition'], root['XRViewport'], root['XRHitTestResult'], root['queueMicrotask'], root['XRWebGLLayer'], root['DeviceMotionEventRotationRate'], root['ReadableStreamDefaultReader'], root['BackgroundFetchRecord'], root['LayoutShift'], root['DeviceMotionEventAcceleration'], root['MediaMetadata'], root['XRTransientInputHitTestResult'], root['PaymentMethodChangeEvent'], root['trustedTypes'], root['FragmentDirective'], root['UserActivation'], root['RTCErrorEvent'], root['CompressionStream'], root['XRRigidTransform'], root['Animation'], root['RTCSctpTransport'], root['XRInputSource'], root['FormDataEvent'], root['XRSystem'], root['EnterPictureInPictureEvent'], root['KeyframeEffect'], root['RTCDtlsTransport'], root['chrome']['runtime'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_OVERLAY'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_HUE'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_SATURATION'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_COLOR_DODGE'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_LUMINOSITY'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_EXCLUSION'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_DIFFERENCE'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_SOFT_LIGHT'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_HARD_LIGHT'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_COLOR_BURN'], root['SVGFEBlendElement']['SVG_FEBLEND_MODE_COLOR'], root['navigator']['userActivation'], root['navigator']['doNotTrack'], root['navigator']['mediaSession'], root['navigator']['languages']['2'], root['clientInformation']['userActivation'], root['clientInformation']['doNotTrack'], root['clientInformation']['mediaSession'], root['clientInformation']['languages']['2'], root['PerformanceObserver']['supportedEntryTypes']],

'type_changes': { "root['visualViewport']['height']": { 'new_type': ,

'new_value': 600,

'old_type': ,

'old_value': 751.2000122070312}},

'values_changed': { "root['clientInformation']['appVersion']": { 'new_value': '5.0 '

'(Macintosh; '

'Intel '

'Mac '

'OS '

'X '

'10_15_4) '

'AppleWebKit/537.36 '

'(KHTML, '

'like '

'Gecko) '

'Chrome/71.0.3542.0 '

'Safari/537.36',

'old_value': '5.0 '

'(Macintosh; '

'Intel '

'Mac '

'OS '

'X '

'10_15_4) '

'AppleWebKit/537.36 '

'(KHTML, '

'like '

'Gecko) '

'Chrome/83.0.4103.61 '

'Safari/537.36'},

"root['clientInformation']['connection']['rtt']": { 'new_value': 100,

'old_value': 150},

"root['clientInformation']['plugins']['0']['name']": { 'new_value': 'Chromium '

'PDF '

'Plugin',

'old_value': 'Chrome '

'PDF '

'Plugin'},

"root['clientInformation']['plugins']['1']['name']": { 'new_value': 'Chromium '

'PDF '

'Viewer',

'old_value': 'Chrome '

'PDF '

'Viewer'},

"root['clientInformation']['userAgent']": { 'new_value': 'Mozilla/5.0 '

'(Macintosh; '

'Intel '

'Mac '

'OS '

'X '

'10_15_4) '

'AppleWebKit/537.36 '

'(KHTML, '

'like '

'Gecko) '

'Chrome/71.0.3542.0 '

'Safari/537.36',

'old_value': 'Mozilla/5.0 '

'(Macintosh; '

'Intel '

'Mac '

'OS '

'X '

'10_15_4) '

'AppleWebKit/537.36 '

'(KHTML, '

'like '

'Gecko) '

'Chrome/83.0.4103.61 '

'Safari/537.36'},

"root['console']['memory']['jsHeapSizeLimit']": { 'new_value': 2330000000,

'old_value': 3760000000},

"root['history']['length']": { 'new_value': 2,

'old_value': 3},

"root['location']['href']": { 'new_value': 'http://localhost/',

'old_value': 'http://localhost/t.html'},

"root['location']['pathname']": { 'new_value': '/',

'old_value': '/t.html'},

"root['navigator']['appVersion']": { 'new_value': '5.0 '

'(Macintosh; '

'Intel '

'Mac '

'OS X '

'10_15_4) '

'AppleWebKit/537.36 '

'(KHTML, '

'like '

'Gecko) '

'Chrome/71.0.3542.0 '

'Safari/537.36',

'old_value': '5.0 '

'(Macintosh; '

'Intel '

'Mac '

'OS X '

'10_15_4) '

'AppleWebKit/537.36 '

'(KHTML, '

'like '

'Gecko) '

'Chrome/83.0.4103.61 '

'Safari/537.36'},

"root['navigator']['connection']['rtt']": { 'new_value': 100,

'old_value': 150},

"root['navigator']['plugins']['0']['name']": { 'new_value': 'Chromium '

'PDF '

'Plugin',

'old_value': 'Chrome '

'PDF '

'Plugin'},

"root['navigator']['plugins']['1']['name']": { 'new_value': 'Chromium '

'PDF '

'Viewer',

'old_value': 'Chrome '

'PDF '

'Viewer'},

"root['screen']['availHeight']": { 'new_value': 1027,

'old_value': 1050},

"root['screen']['availTop']": { 'new_value': 23,

'old_value': 0},

"root['screen']['colorDepth']": { 'new_value': 24,

'old_value': 30},

"root['screen']['orientation']['type']": { 'new_value': 'portrait-primary',

'old_value': 'landscape-primary'},

"root['screen']['pixelDepth']": { 'new_value': 24,

'old_value': 30},

"root['visualViewport']['width']": { 'new_value': 800,

'old_value': 1344}}}

项目地址

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值