selenium破解bilbili滑动验证码

最新推荐文章于 2024-07-25 14:48:50 发布

ZwY*

最新推荐文章于 2024-07-25 14:48:50 发布

阅读量561

点赞数

分类专栏： python爬虫

本文链接：https://blog.csdn.net/qq_40821402/article/details/88616909

版权

python爬虫专栏收录该内容

12 篇文章 1 订阅

订阅专栏

登陆b站的时候大家都会见到滑动验证码，打开开发者工具分析一下这里的验证码
1：首先需要鼠标触碰到滑动按钮才会显示出完整的验证码图片
2：点击按钮出现缺口图片
3：查看图片元素会（打开图片链接）发现完整的图片被打乱了。把整个分成了上下两部分各是26张。但是会发现虽然图片的顺序是乱的可是它的坐标还是有规律可寻
在这里插入图片描述

解决思路：
先来分析一下这个坐标的规律，我就打开查找元素一直在那看他们之间的关系，最后发现图片上半部分的第一块和下半部分的第二块x值相同，下部分第一块与上半部分第二块x值相同。原来它是交叉放的。。那接下来就要把它拼接起来。同样的道理，带缺口的图也是这个规律。拼接完成后遍历两张图片的像素，如果像素之间差的绝对值大于50那么也就找到了缺口的x轴坐标。由于是水平移动所需要的是x轴所以找出来x轴即可。
计算出来缺口的距离之后就可以模拟人滑动按钮了，在模拟的过程中速度的限制相当严格，参考了别人的博客找到了解决办法但是识别的效果也不是太理想，还请大神们指教。。

需要用到的包：

import random
import time, re
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from PIL import Image
import requests
from io import BytesIO
class Bilibili(object):
    def __init__(self):
        self.url = 'https://passport.bilibili.com/login'
        self.driver = webdriver.Chrome()
        self.driver.get(self.url)
        time.sleep(3)
        self.wait = WebDriverWait(self.driver, 20)

获取图片和列表位置：

    def get_image_url(self, xpath):
        link = re.compile('background-image: url\("(.*?)"\); background-position: (.*?)px (.*?)px;')
        elements = self.driver.find_elements_by_xpath(xpath)
        image_url = None
        location = list()
        for element in elements:
            style = element.get_attribute("style")
            groups = link.search(style)
            url = groups[1]
            x_pos = groups[2]
            y_pos = groups[3]
            location.append((int(x_pos), int(y_pos)))
            image_url = url
        return image_url, location

拼接图片：

    def mosaic_image(self, image_url, location):
        resq = requests.get(image_url)
        file = BytesIO(resq.content)
        img = Image.open(file)
        image_upper_lst = []
        image_down_lst = []
        for pos in location:
            if pos[1] == 0:
                # y值==0的图片属于上半部分，高度58
                image_upper_lst.append(img.crop((abs(pos[0]), 0, abs(pos[0]) + 10, 58)))
            else:
                # y值==58的图片属于下半部分
                image_down_lst.append(img.crop((abs(pos[0]), 58, abs(pos[0]) + 10, img.height)))

        x_offset = 0
        # 创建一张画布，x_offset主要为新画布使用
        new_img = Image.new("RGB", (260, img.height))
        for img in image_upper_lst:
            new_img.paste(img, (x_offset, 58))
            x_offset += img.width

        x_offset = 0
        for img in image_down_lst:
            new_img.paste(img, (x_offset, 0))
            x_offset += img.width

        return new_img

判断颜色是否相近：

    def is_similar_color(self, x_pixel, y_pixel): # 传入完整图片和缺口图片的像素
        for i, pixel in enumerate(x_pixel):
            if abs(y_pixel[i] - pixel) > 50: #  如果两张图片差的绝对值大于50的话颜色不相近
                return False
        return True

计算滑动距离：

    def get_offset_distance(self, cut_image, full_image):
        for x in range(cut_image.width):
            for y in range(cut_image.height):
                cpx = cut_image.getpixel((x, y))# 获取坐标点像素的RGB值
                fpx = full_image.getpixel((x, y))
                if not self.is_similar_color(cpx, fpx):
                    img = cut_image.crop((x, y, x + 50, y + 40))
                    # 保存一下计算出来位置图片，看看是不是缺口部分
                    img.save("1.jpg")
                    return x

开始移动：

 def start_move(self, distance):
        element = self.driver.find_element_by_xpath('//div[@class="gt_slider_knob gt_show"]')

        # 这里就是根据移动进行调试，计算出来的位置不是百分百正确的，加上一点偏移
        distance -= element.size.get('width') / 2
        distance += 15

        # 按下鼠标左键
        ActionChains(self.driver).click_and_hold(element).perform()
        time.sleep(0.5)
        while distance > 0:
            if distance > 7:
                # 如果距离大于10，就让他移动快一点
                span = random.randint(5, 8)
            else:
                # 快到缺口了，就移动慢一点
                span = random.randint(2, 3)
            ActionChains(self.driver).move_by_offset(span, 0).perform()
            distance -= span
            time.sleep(random.randint(10,50) / 100)

        ActionChains(self.driver).move_by_offset(distance, 1).perform()
        ActionChains(self.driver).release(on_element=element).perform()

可以实现注册:(根据需要)

 '''
    # 注册
    def register(self):
        element = self.driver.find_element_by_xpath('//input[@id="sms_username"]')
        element.clear()
        element.send_keys("15836197228")

        ele_captcha = self.driver.find_element_by_xpath('//span[@class="js-btn-captcha btn-captcha"]')
        ele_captcha.click()

    '''

可以登录：

# 登陆
    def logging(self):
        username = '用户名'
        password = '密码'
        dom_input_id = self.driver.find_element_by_id("login-username")
        dom_input_keyword = self.driver.find_element_by_id("login-passwd")
        dom_input_id.send_keys(username)
        time.sleep(3)
        dom_input_keyword.send_keys(password)
        submit = self.driver.find_element_by_xpath('//*[@class="btn-box"]/a')
        submit.click()
        time.sleep(1)

完成整个拖动：

    def analog_drag(self):

        # 鼠标移动到拖动按钮，显示出拖动图片
        element_list = self.driver.find_element_by_xpath('//*[@class="gt_slider"]/div[2]')
        ActionChains(self.driver).move_to_element(element_list).perform()
        time.sleep(3)

        # 刷新一下极验图片
        element = self.driver.find_element_by_xpath('//a[@class="gt_refresh_button"]')
        element.click()
        time.sleep(1)

        # 获取图片地址和位置坐标列表
        cut_image_url, cut_location = self.get_image_url('//div[@class="gt_cut_bg_slice"]')
        full_image_url, full_location = self.get_image_url('//div[@class="gt_cut_fullbg_slice"]')

        # 根据坐标拼接图片
        cut_image = self.mosaic_image(cut_image_url, cut_location)
        full_image = self.mosaic_image(full_image_url, full_location)

        # 保存图片方便查看
        cut_image.save("cut.jpg")
        full_image.save("full.jpg")

        # 根据两个图片计算距离
        distance = self.get_offset_distance(cut_image, full_image)

        # 开始移动
        self.start_move(distance)

        # 如果出现error
        try:
            WebDriverWait(self.driver, 5, 0.5).until(
                EC.element_to_be_clickable((By.XPATH, '//div[@class="gt_ajax_tip gt_error"]')))
            print("验证失败")
            return
        except TimeoutException as e:
            pass

        # 判断是否验证成功
        try:
            WebDriverWait(self.driver, 10, 0.5).until(
                EC.element_to_be_clickable((By.XPATH, '//div[@class="gt_ajax_tip gt_success"]')))
        except TimeoutException:
            print("again times")
            time.sleep(1)
            # 失败后递归执行拖动
            self.analog_drag()
        else:
            print('登陆成功')

    # 获取图片和位置列表

最后写main：

    def main(self):

        WebDriverWait(self.driver, 10, 0.5).until(
            EC.element_to_be_clickable((By.XPATH, '//div[@class="gt_slider_knob gt_show"]')))
        self.logging()
        # 进入模拟拖动流程
        self.analog_drag()

感谢博主：
https://blog.csdn.net/bf02jgtrs00xktcx/article/details/80002248#commentsedit
让笔者爬虫小白学习到不少，，