Python爬虫（使用Selenium库）解决阿里云盾滑块认证方法2025

七悦

已于 2025-02-01 13:18:32 修改

阅读量870

点赞数 2

文章标签： python 爬虫 selenium

于 2025-01-31 08:28:48 首次发布

本文链接：https://blog.csdn.net/weixin_45414200/article/details/145403097

版权

Python中使用Selenium库，模拟手动点击操作来爬取公开数据是可行的，然而部分网站会有随机验证机制，禁止自动爬取资源，如图所示：
在这里插入图片描述

下面以阿里云盾认证为例，来介绍解决办法，完整代码参考如下：

import random
import time
import csv
import selenium
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By


class Scrapy_Test:
    current_tab = 0  # 浏览器当前页索引
    try_max_times = 10  # 滑块认证，增加tab最多尝试次数    

    def __init__(self):
        # 谷歌浏览器驱动
        driver_path = "./chromedriver-win64/chromedriver.exe"
        service = Service(driver_path)
        options = selenium.webdriver.ChromeOptions()
        # 添加一个参数来禁用“AutomationControlled”标志
        options.add_argument("--disable-blink-features=AutomationControlled")              
        options.add_experimental_option("excludeSwitches", ["enable-automation"])
        # 关闭 userAutomationExtension
        options.add_experimental_option("useAutomationExtension", False)
        options.add_experimental_option("detach", True)
        self.driver = selenium.webdriver.Chrome(options=options, service=service)
        # 绕过滑块验证
        self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument",
                                    {'source': 'Object.defineProperty(navigator,"webdriver",{get:()=>undefined})'})

        self.driver.maximize_window()

    def check_slide_web(self):
        """
        检测是滑块验证页
        :return:
        """
        try:
            slide_web = self.driver.find_element(By.ID, 'tips_title')
            if slide_web:
                return True
        except:
            return False

    def add_tab(self):
        """
        浏览器增加tab页
        :return:
        """
        self.driver.execute_script('window.open("你要爬取的网址")')
        self.current_tab += 1
        # 需增加随机等待时间，否则会被认定为机器操作
        wait_secs = random.uniform(20, 50)
        print(f'{wait_secs}秒后开始跳转')
        time.sleep(wait_secs)
        self.driver.switch_to.window(self.driver.window_handles[self.current_tab])

    def slide_verify(self):
        """
        阿里云盾授权验证
        :return:
        """
        self.add_tab()
        # 尝试3次
        for i in range(3):
            try:
                inner = self.driver.find_element(By.ID, "aliyunCaptcha-sliding-slider")
                outer = self.driver.find_element(By.ID, "aliyunCaptcha-sliding-text-box")
                # 使用 ActionChains 绕过验证码
                actions = ActionChains(self.driver)
                actions.move_to_element(inner).click_and_hold().move_by_offset(
                    outer.size['width'] + random.randint(1, 5),
                    0).release().perform()
            except Exception as e:
                print('card_slide err:', e)
            time.sleep(random.uniform(2, 5))
            try:
            	# 目标网站特征，进入后有类名为head1的元素
                index = self.driver.find_element(By.CLASS_NAME, 'head1')
                if index:
                    print('云盾验证通过，成功进入主页')
                    return True
            except Exception as e:
                print('没有进到主页，开始刷新主页', e)
                self.driver.execute_script('window.location.reload()')
                wait_secs = random.uniform(20, 50)
                print(f'{wait_secs}秒后开始处理')
                time.sleep(wait_secs)
        if self.try_max_times > 0:
            print('增加tab页重新尝试滑动')
            self.try_max_times -= 1
            # 递归尝试
            return self.slide_verify()
        else:
            return False

代码中注意： self.driver.execute_script(‘window.open(“你要爬取的网址”)’)
目前可以通过阿里云的云盾认证，等待随机秒后再执行：self.driver.switch_to.window(self.driver.window_handles[self.current_tab]
不会被阿里云盾识别为机器操作，亲测可以爬取想要网站中爬取的内容，对于不定时弹出云滑块认证，检测是否是滑块页面，执行下面代码后，再执行业务代码即可。

 # 检测到阿里云盾认证
  if self.check_slide_web():
      self.slide_verify()

总结：
不要使用：self.driver.get(‘想要爬取的网址’)
而需要使用：self.driver.execute_script(‘window.open(“你要爬取的网址”)’)
执行javascript脚本，避免被认定为机器操作。