【浅记】使用seleniumwire滑动极兔滑块

最新推荐文章于 2024-05-27 20:43:36 发布
Docda
最新推荐文章于 2024-05-27 20:43:36 发布
阅读量466
点赞数
文章标签： python opencv 计算机视觉
本文链接：https://blog.csdn.net/qq_43035475/article/details/125616995
版权
直接上代码
import asyncio
import gzip
import os
import time
import requests
import cv2
import numpy
from random import randint
from selenium.webdriver.common.by import By
from seleniumwire import webdriver
from selenium.webdriver.common.action_chains import ActionChains


def show_image(img_array, name='img', resize_flag=False):
    """展示图片"""
    maxheight = 540
    maxwidth = 960
    scalex = maxwidth / img_array.shape[1]
    scaley = maxheight / img_array.shape[0]
    scale = min(scalex, scaley)
    if resize_flag and scale < 1:
        img_array = cv2.resize(img_array, (0, 0), fx=scale, fy=scale)
    cv2.imshow(name, img_array)
    cv2.waitkey(0)
    cv2.destroywindow(name)


def make_threshold(img):
    """全局阈值
    将图片二值化,去除噪点,让其黑白分明"""
    x = numpy.ones(img.shape, numpy.uint8) * 255
    y = img - x
    result, thresh = cv2.threshold(y, 127, 255, cv2.thresh_binary_inv)
    # 将二值化后的结果返回
    return thresh


def move_slider(website, slider, track, **kwargs):
    """将滑块移动到终点位置
    :param website: selenium页面对象
    :param slider: selenium页面中滑块元素对象
    :param track: 到终点所需的距离
    """
    name = kwargs.get('name', '滑块')

    try:
        if track[0] > 200:
            return track[0]
        # 点击滑块元素并拖拽
        ActionChains(website).click_and_hold(slider).perform()
        time.sleep(0.15)
        for i in track:
            # 随机上下浮动鼠标
            ActionChains(website).move_by_offset(xoffset=i, yoffset=randint(-2, 2)).perform()
        # 释放元素
        time.sleep(0.2)
        ActionChains(website).release(slider).perform()
        time.sleep(0.5)
        # 随机拿开鼠标
        # ActionChains(website).move_by_offset(xoffset=randint(200, 300), yoffset=randint(200, 300)).perform()
        print(f'[网页] 拖拽 {name}')
        return True
    except Exception as e:
        print(f'[网页] 拖拽 {name} 失败 {e}')


class computedistance:
    """获取需要滑动的距离
    将验证码背景大图和需要滑动的小图进行处理,先在大图中找到相似的小图位置,再获取对应的像素偏移量"""

    def __init__(self, background_path: str, image_to_move: str, offset_top_px: int, show_img=False):
        """
        :param background_path: 验证码背景大图
        :param image_to_move: 需要滑动的小图
        :param offset_top_px: 小图距离在大图上的顶部边距(像素偏移量)
        :param show_img: 是否展示图片
        """
        self.background_img = cv2.imread(background_path)
        self.offset_px = offset_top_px
        self.show_img = show_img
        small_img_data = cv2.imread(image_to_move, cv2.imread_unchanged)
        # 得到一个改变维度为50的乘以值
        scalex = 50 / small_img_data.shape[1]
        # 使用最近邻插值法缩放，让xy乘以scalex，得到缩放后shape为50x50的图片
        self.tpl_img = cv2.resize(small_img_data, (0, 0), fx=scalex, fy=scalex)
        self.background_cutting = None

    def show(self, img):
        if self.show_img:
            show_image(img)

    def tpl_op(self):
        # 将小图转换为灰色
        tpl_gray = cv2.cvtcolor(self.tpl_img, cv2.color_bgr2gray)
        h, w = tpl_gray.shape
        # 将背景图转换为灰色
        # background_gray = cv2.cvtcolor(self.background_img, cv2.color_bgr2gray)
        background_gray = cv2.cvtcolor(self.background_cutting, cv2.color_bgr2gray)
        # 得到二值化后的小图
        threshold_img = make_threshold(tpl_gray)
        # 将小图与大图进行模板匹配,找到所对应的位置
        result = cv2.matchtemplate(background_gray, threshold_img, cv2.tm_ccoeff_normed)
        min_val, max_val, min_loc, max_loc = cv2.minmaxloc(result)
        # 左上角位置
        top_left = (max_loc[0] - 5, max_loc[1] + self.offset_px)
        # 右下角位置
        bottom_right = (top_left[0] + w, top_left[1] + h)
        # 在源颜色大图中画出小图需要移动到的终点位置
        """rectangle(图片源数据, 左上角, 右下角, 颜色, 画笔厚度)"""
        cv2.rectangle(self.background_img, top_left, bottom_right, (0, 0, 255), 2)
        if self.show_img:
            show_image(self.background_img)
        return top_left

    def cutting_background(self):
        """切割图片的上下边框"""
        height = self.tpl_img.shape[0]
        # 将大图中上下多余部分去除,如: background_img[40:110, :]
        self.background_cutting = self.background_img[self.offset_px - 10: self.offset_px + height + 10, :]

    def run(self):
        # 如果小图的长度与大图的长度一致则不用将大图进行切割,可以将self.cutting_background()注释掉
        self.cutting_background()
        return self.tpl_op()


def identify_gap(bg, tp):
    '''
    bg: 背景图片
    tp: 缺口图片
    out:输出图片
    '''
    # 读取背景图片和缺口图片
    bg_img = cv2.imread(bg)  # 背景图片
    tp_img = cv2.imread(tp)  # 缺口图片

    # 识别图片边缘
    bg_edge = cv2.Canny(bg_img, 100, 200)
    tp_edge = cv2.Canny(tp_img, 100, 200)

    # 转换图片格式
    bg_pic = cv2.cvtColor(bg_edge, cv2.COLOR_GRAY2RGB)
    tp_pic = cv2.cvtColor(tp_edge, cv2.COLOR_GRAY2RGB)

    # 缺口匹配
    res = cv2.matchTemplate(bg_pic, tp_pic, cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)  # 寻找最优匹配

    # 绘制方框
    # th, tw = tp_pic.shape[:2]
    tl = max_loc  # 左上角点的坐标
    # br = (tl[0] + tw, tl[1] + th)  # 右下角点的坐标
    # cv2.rectangle(bg_img, tl, br, (0, 0, 255), 2)  # 绘制矩形
    # cv2.imwrite(out, bg_img)  # 保存在本地
    print(tl[0])
    # 返回缺口的X坐标
    return tl[0]


class SliderMove(object):
    def __init__(self):
        self.url = 'https://www.jtexpress.com.cn/service.html?indexIpt=JT5125611716695'
        # self.process_folder = os.path.join(os.path.expanduser('~'), "desktop", "today_news")
        self.process_folder = ''
        self.background_path = './bg.jpg'
        self.small_path = './tp.jpg'
        self.small_px = 0
        self.xpath = {}
        self.browser = None
        self.bg_path = None
        self.tp_path = None

    def check_file_exist(self):
        """检查流程目录是否存在"""
        if not os.path.isdir(self.process_folder):
            os.mkdir(self.process_folder)

    def start_browser(self):
        """启动浏览器"""
        chrome_options = webdriver.ChromeOptions()
        # chrome_options.add_argument('--headless')
        chrome_options.add_argument(
            "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36")
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument(f"--ignore-ssl-errors")
        chrome_options.add_argument(f"--ignore-certificate-errors")
        # chrome_options.add_argument(f"blink-settings=imagesEnabled=false")
        chrome_options.add_argument(f"disable-infobars")
        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
        self.browser = webdriver.Chrome(options=chrome_options,
                                        executable_path='C:\Program Files\Google\Chrome\Application\chromedriver.exe')
        # self.browser = webdriver.Chrome(options=chrome_options,
        #                                 executable_path='/usr/bin/chromedriver')
        self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """Object.defineProperty(navigator, 'webdriver', { get: () => undefined });"""})

        self.browser.maximize_window()

    def close_browser(self):
        self.browser.close()
        self.browser.quit()

    def wait_element_loaded(self, xpath: str, timeout=10, close_browser=True):
        """等待页面元素加载完成
        :param xpath: xpath表达式
        :param timeout: 最长等待超时时间
        :param close_browser: 元素等待超时后是否关闭浏览器
        :return: boolean
        """
        now_time = int(time.time())
        while int(time.time()) - now_time < timeout:
            # noinspection pybroadexception
            try:
                # element = self.browser.find_element_by_xpath(xpath)
                element = self.browser.find_element(by=By.CLASS_NAME, value=xpath)
                if element:
                    return True
                time.sleep(1)
            except Exception:
                pass
        else:
            if close_browser:
                self.close_browser()
            # print("查找页面元素失败，如果不存在网络问题请尝试修改xpath表达式")
            return False

    def process_main(self):
        """处理页面内容"""
        self.browser.get(self.url)
        tp = ''
        bg = ''
        for request in self.browser.requests:
            if request.response:
                # print(request.path)
                if 'hycdn' in request.path:
                    if 'PNG' in str(request.response.body):
                        tp = request.response.body
                    else:
                        bg = request.response.body
        self.bg_path = f'./bg_{os.getpid()}.jpg'
        self.tp_path = f'./tp_{os.getpid()}.jpg'
        with open(self.bg_path, 'wb')as f1:
            f1.write(bg)
        with open(self.tp_path, 'wb')as f2:
            f2.write(tp)
        for i in range(2):
            print('重试第%d次' % (i))
            # small_img = self.browser.find_element_by_xpath(self.xpath['slideBlock'])
            # small_img = self.browser.find_element_by_xpath('//img[@id="slideBlock"]')
            # 获取小图片距离背景图顶部的像素距离
            # self.small_px = small_img.value_of_css_property("top").replace("px", "").split(".")[0]
            # print(self.small_px)
            time.sleep(0.5)
            # 如果没滑动成功则刷新页面重试
            if not self.process_slider():
                self.browser.refresh()
                continue
            else:
                break

    @staticmethod
    def handle_distance(distance):
        """将直线距离转为缓慢的轨迹"""
        import random
        slow_distance = []
        while sum(slow_distance) <= distance:
            slow_distance.append(random.randint(-2, 50))

        if sum(slow_distance) != distance:
            slow_distance.append(distance - sum(slow_distance))
        return slow_distance

    def process_slider(self):
        """处理滑块验证码"""
        # distance_obj = computedistance(self.background_path, self.small_path, int(self.small_px), show_img=False)
        # 获取移动所需的距离
        # distance = distance_obj.run()
        distance = identify_gap(self.bg_path, self.tp_path)
        # track = self.handle_distance(distance[0])
        track = self.handle_distance(distance // 2 - 10)
        track.extend([-10, -5, -1])

        # slider_element = self.browser.find_element_by_xpath(self.xpath['tcaptcha_drag_thumb'])
        iframe = self.browser.find_element(by=By.ID, value='tcaptcha_iframe')
        self.browser.switch_to.frame(iframe)
        slider_element = self.browser.find_element(By.ID, "tcaptcha_drag_thumb")
        move_slider(self.browser, slider_element, track)
        time.sleep(1)

        # 如果滑动完成则返回true
        # if not self.wait_element_loaded(self.xpath['waybillStatusMain'], timeout=2, close_browser=False):
        if not self.wait_element_loaded('waybillStatusMain', timeout=2, close_browser=False):
            return False
        else:
            for request in self.browser.requests:
                if request.response:
                    # print(request.path)
                    if 'trackingCustomerByWaybillNo' in request.path:
                        print(request.path)
                        # print(request.response.body.decode('utf8'))
                        print(gzip.decompress(request.response.body).decode("utf-8"))
            return True

    def run(self):
        # self.check_file_exist()
        self.start_browser()
        self.process_main()
        self.close_browser()
        os.remove(self.tp_path)
        os.remove(self.bg_path)





if __name__ == '__main__':
    api_task = SliderMove()
    api_task.run()