【浅记】使用seleniumwire滑动极兔滑块

直接上代码

import asyncio
import gzip
import os
import time
import requests
import cv2
import numpy
from random import randint
from selenium.webdriver.common.by import By
from seleniumwire import webdriver
from selenium.webdriver.common.action_chains import ActionChains


def show_image(img_array, name='img', resize_flag=False):
    """展示图片"""
    maxheight = 540
    maxwidth = 960
    scalex = maxwidth / img_array.shape[1]
    scaley = maxheight / img_array.shape[0]
    scale = min(scalex, scaley)
    if resize_flag and scale < 1:
        img_array = cv2.resize(img_array, (0, 0), fx=scale, fy=scale)
    cv2.imshow(name, img_array)
    cv2.waitkey(0)
    cv2.destroywindow(name)


def make_threshold(img):
    """全局阈值
    将图片二值化,去除噪点,让其黑白分明"""
    x = numpy.ones(img.shape, numpy.uint8) * 255
    y = img - x
    result, thresh = cv2.threshold(y, 127, 255, cv2.thresh_binary_inv)
    # 将二值化后的结果返回
    return thresh


def move_slider(website, slider, track, **kwargs):
    """将滑块移动到终点位置
    :param website: selenium页面对象
    :param slider: selenium页面中滑块元素对象
    :param track: 到终点所需的距离
    """
    name = kwargs.get('name', '滑块')

    try:
        if track[0] > 200:
            return track[0]
        # 点击滑块元素并拖拽
        ActionChains(website).click_and_hold(slider).perform()
        time.sleep(0.15)
        for i in track:
            # 随机上下浮动鼠标
            ActionChains(website).move_by_offset(xoffset=i, yoffset=randint(-2, 2)).perform()
        # 释放元素
        time.sleep(0.2)
        ActionChains(website).release(slider).perform()
        time.sleep(0.5)
        # 随机拿开鼠标
        # ActionChains(website).move_by_offset(xoffset=randint(200, 300), yoffset=randint(200, 300)).perform()
        print(f'[网页] 拖拽 {name}')
        return True
    except Exception as e:
        print(f'[网页] 拖拽 {name} 失败 {e}')


class computedistance:
    """获取需要滑动的距离
    将验证码背景大图和需要滑动的小图进行处理,先在大图中找到相似的小图位置,再获取对应的像素偏移量"""

    def __init__(self, background_path: str, image_to_move: str, offset_top_px: int, show_img=False):
        """
        :param background_path: 验证码背景大图
        :param image_to_move: 需要滑动的小图
        :param offset_top_px: 小图距离在大图上的顶部边距(像素偏移量)
        :param show_img: 是否展示图片
        """
        self.background_img = cv2.imread(background_path)
        self.offset_px = offset_top_px
        self.show_img = show_img
        small_img_data = cv2.imread(image_to_move, cv2.imread_unchanged)
        # 得到一个改变维度为50的乘以值
        scalex = 50 / small_img_data.shape[1]
        # 使用最近邻插值法缩放,让xy乘以scalex,得到缩放后shape为50x50的图片
        self.tpl_img = cv2.resize(small_img_data, (0, 0), fx=scalex, fy=scalex)
        self.background_cutting = None

    def show(self, img):
        if self.show_img:
            show_image(img)

    def tpl_op(self):
        # 将小图转换为灰色
        tpl_gray = cv2.cvtcolor(self.tpl_img, cv2.color_bgr2gray)
        h, w = tpl_gray.shape
        # 将背景图转换为灰色
        # background_gray = cv2.cvtcolor(self.background_img, cv2.color_bgr2gray)
        background_gray = cv2.cvtcolor(self.background_cutting, cv2.color_bgr2gray)
        # 得到二值化后的小图
        threshold_img = make_threshold(tpl_gray)
        # 将小图与大图进行模板匹配,找到所对应的位置
        result = cv2.matchtemplate(background_gray, threshold_img, cv2.tm_ccoeff_normed)
        min_val, max_val, min_loc, max_loc = cv2.minmaxloc(result)
        # 左上角位置
        top_left = (max_loc[0] - 5, max_loc[1] + self.offset_px)
        # 右下角位置
        bottom_right = (top_left[0] + w, top_left[1] + h)
        # 在源颜色大图中画出小图需要移动到的终点位置
        """rectangle(图片源数据, 左上角, 右下角, 颜色, 画笔厚度)"""
        cv2.rectangle(self.background_img, top_left, bottom_right, (0, 0, 255), 2)
        if self.show_img:
            show_image(self.background_img)
        return top_left

    def cutting_background(self):
        """切割图片的上下边框"""
        height = self.tpl_img.shape[0]
        # 将大图中上下多余部分去除,如: background_img[40:110, :]
        self.background_cutting = self.background_img[self.offset_px - 10: self.offset_px + height + 10, :]

    def run(self):
        # 如果小图的长度与大图的长度一致则不用将大图进行切割,可以将self.cutting_background()注释掉
        self.cutting_background()
        return self.tpl_op()


def identify_gap(bg, tp):
    '''
    bg: 背景图片
    tp: 缺口图片
    out:输出图片
    '''
    # 读取背景图片和缺口图片
    bg_img = cv2.imread(bg)  # 背景图片
    tp_img = cv2.imread(tp)  # 缺口图片

    # 识别图片边缘
    bg_edge = cv2.Canny(bg_img, 100, 200)
    tp_edge = cv2.Canny(tp_img, 100, 200)

    # 转换图片格式
    bg_pic = cv2.cvtColor(bg_edge, cv2.COLOR_GRAY2RGB)
    tp_pic = cv2.cvtColor(tp_edge, cv2.COLOR_GRAY2RGB)

    # 缺口匹配
    res = cv2.matchTemplate(bg_pic, tp_pic, cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)  # 寻找最优匹配

    # 绘制方框
    # th, tw = tp_pic.shape[:2]
    tl = max_loc  # 左上角点的坐标
    # br = (tl[0] + tw, tl[1] + th)  # 右下角点的坐标
    # cv2.rectangle(bg_img, tl, br, (0, 0, 255), 2)  # 绘制矩形
    # cv2.imwrite(out, bg_img)  # 保存在本地
    print(tl[0])
    # 返回缺口的X坐标
    return tl[0]


class SliderMove(object):
    def __init__(self):
        self.url = 'https://www.jtexpress.com.cn/service.html?indexIpt=JT5125611716695'
        # self.process_folder = os.path.join(os.path.expanduser('~'), "desktop", "today_news")
        self.process_folder = ''
        self.background_path = './bg.jpg'
        self.small_path = './tp.jpg'
        self.small_px = 0
        self.xpath = {}
        self.browser = None
        self.bg_path = None
        self.tp_path = None

    def check_file_exist(self):
        """检查流程目录是否存在"""
        if not os.path.isdir(self.process_folder):
            os.mkdir(self.process_folder)

    def start_browser(self):
        """启动浏览器"""
        chrome_options = webdriver.ChromeOptions()
        # chrome_options.add_argument('--headless')
        chrome_options.add_argument(
            "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36")
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument(f"--ignore-ssl-errors")
        chrome_options.add_argument(f"--ignore-certificate-errors")
        # chrome_options.add_argument(f"blink-settings=imagesEnabled=false")
        chrome_options.add_argument(f"disable-infobars")
        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
        self.browser = webdriver.Chrome(options=chrome_options,
                                        executable_path='C:\Program Files\Google\Chrome\Application\chromedriver.exe')
        # self.browser = webdriver.Chrome(options=chrome_options,
        #                                 executable_path='/usr/bin/chromedriver')
        self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """Object.defineProperty(navigator, 'webdriver', { get: () => undefined });"""})

        self.browser.maximize_window()

    def close_browser(self):
        self.browser.close()
        self.browser.quit()

    def wait_element_loaded(self, xpath: str, timeout=10, close_browser=True):
        """等待页面元素加载完成
        :param xpath: xpath表达式
        :param timeout: 最长等待超时时间
        :param close_browser: 元素等待超时后是否关闭浏览器
        :return: boolean
        """
        now_time = int(time.time())
        while int(time.time()) - now_time < timeout:
            # noinspection pybroadexception
            try:
                # element = self.browser.find_element_by_xpath(xpath)
                element = self.browser.find_element(by=By.CLASS_NAME, value=xpath)
                if element:
                    return True
                time.sleep(1)
            except Exception:
                pass
        else:
            if close_browser:
                self.close_browser()
            # print("查找页面元素失败,如果不存在网络问题请尝试修改xpath表达式")
            return False

    def process_main(self):
        """处理页面内容"""
        self.browser.get(self.url)
        tp = ''
        bg = ''
        for request in self.browser.requests:
            if request.response:
                # print(request.path)
                if 'hycdn' in request.path:
                    if 'PNG' in str(request.response.body):
                        tp = request.response.body
                    else:
                        bg = request.response.body
        self.bg_path = f'./bg_{os.getpid()}.jpg'
        self.tp_path = f'./tp_{os.getpid()}.jpg'
        with open(self.bg_path, 'wb')as f1:
            f1.write(bg)
        with open(self.tp_path, 'wb')as f2:
            f2.write(tp)
        for i in range(2):
            print('重试第%d次' % (i))
            # small_img = self.browser.find_element_by_xpath(self.xpath['slideBlock'])
            # small_img = self.browser.find_element_by_xpath('//img[@id="slideBlock"]')
            # 获取小图片距离背景图顶部的像素距离
            # self.small_px = small_img.value_of_css_property("top").replace("px", "").split(".")[0]
            # print(self.small_px)
            time.sleep(0.5)
            # 如果没滑动成功则刷新页面重试
            if not self.process_slider():
                self.browser.refresh()
                continue
            else:
                break

    @staticmethod
    def handle_distance(distance):
        """将直线距离转为缓慢的轨迹"""
        import random
        slow_distance = []
        while sum(slow_distance) <= distance:
            slow_distance.append(random.randint(-2, 50))

        if sum(slow_distance) != distance:
            slow_distance.append(distance - sum(slow_distance))
        return slow_distance

    def process_slider(self):
        """处理滑块验证码"""
        # distance_obj = computedistance(self.background_path, self.small_path, int(self.small_px), show_img=False)
        # 获取移动所需的距离
        # distance = distance_obj.run()
        distance = identify_gap(self.bg_path, self.tp_path)
        # track = self.handle_distance(distance[0])
        track = self.handle_distance(distance // 2 - 10)
        track.extend([-10, -5, -1])

        # slider_element = self.browser.find_element_by_xpath(self.xpath['tcaptcha_drag_thumb'])
        iframe = self.browser.find_element(by=By.ID, value='tcaptcha_iframe')
        self.browser.switch_to.frame(iframe)
        slider_element = self.browser.find_element(By.ID, "tcaptcha_drag_thumb")
        move_slider(self.browser, slider_element, track)
        time.sleep(1)

        # 如果滑动完成则返回true
        # if not self.wait_element_loaded(self.xpath['waybillStatusMain'], timeout=2, close_browser=False):
        if not self.wait_element_loaded('waybillStatusMain', timeout=2, close_browser=False):
            return False
        else:
            for request in self.browser.requests:
                if request.response:
                    # print(request.path)
                    if 'trackingCustomerByWaybillNo' in request.path:
                        print(request.path)
                        # print(request.response.body.decode('utf8'))
                        print(gzip.decompress(request.response.body).decode("utf-8"))
            return True

    def run(self):
        # self.check_file_exist()
        self.start_browser()
        self.process_main()
        self.close_browser()
        os.remove(self.tp_path)
        os.remove(self.bg_path)





if __name__ == '__main__':
    api_task = SliderMove()
    api_task.run()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值