滑块验证码selenium识别(hqew)

最新推荐文章于 2024-04-22 16:36:03 发布
guixuqi
最新推荐文章于 2024-04-22 16:36:03 发布
阅读量1.1k
点赞数 3
文章标签： selenium python opencv
本文链接：https://blog.csdn.net/qq_42519299/article/details/127104366
版权
在这里插入图片描述
解决思路: 图片链接提取下载-图片处理(降噪-灰度二值化-缺口横坐标计算-构造滑动轨迹-模拟拖动-验证)
import base64
import json
from datetime import datetime
import cv2
import requests
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from urllib.request import urlretrieve
from bs4 import BeautifulSoup
import re, os, sys
from PIL import Image
from time import sleep
import pymysql
import random
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
sys.path.insert(0, BASE_DIR)


def _tran_canny(image):
    """降噪"""
    image = cv2.GaussianBlur(image, (3, 3), 0)
    return cv2.Canny(image, 50, 150)


def detect_displacement(img_slider_path, image_background_path):
    """detect displacement"""
    # 灰度化
    image = cv2.imread(img_slider_path, 0)
    template = cv2.imread(image_background_path, 0)
    # 图片模板位置匹配 归一化相关系数匹配法cv2.TM_CCOEFF_NORMED
    res = cv2.matchTemplate(_tran_canny(image), _tran_canny(template), method=cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    top_left = max_loc[0]  # 横坐标
    print(top_left)
    # 展示圈出来的区域
    # x, y = max_loc  # 获取x,y位置坐标
    #
    # w, h = image.shape[::-1]  # 宽高
    # cv2.rectangle(template, (x, y), (x + w, y + h), (7, 249, 151), 2)
    # show(template)
    return top_left


def init():
    global index_url, hot_url, detail_url, headers, browser, username, password, wait0, wait1, wait2
    index_url = 'https://www.hqew.com/'
    hot_url = 'https://product.hqew.com/home/hotsearch?callback=hotwordsCallback'
    detail_url = 'https://s.hqew.com/{}.html?from=hotsearch'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
               'host': 'product.hqew.com'}
    options = webdriver.ChromeOptions()
    # options.add_argument('--headless')
    # options.add_argument('--disable-gpu')
    # options.add_argument('--no-sandbox')
    # options.add_argument('disable-infobars')
    # options.add_experimental_option('useAutomationExtension', False)
    # options.add_experimental_option('excludeSwitches', ['enable-automation'])
    # options.add_argument("--disable-blink-features=AutomationControlled")
    # options.add_experimental_option("excludeSwitches", ["enable-logging"])
    # options.add_argument('-ignore-certificate-errors')
    # options.add_argument('-ignore -ssl-errors')
    browser = webdriver.Chrome(chrome_options=options)
    browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """
        Object.defineProperty(navigator, 'webdriver', {
          get: () => false
        })
      """
    })
    # browser = webdriver.Chrome(executable_path = '/usr/local/bin/chromedriver', chrome_options=options)
    browser.maximize_window()
    users = [{'u': '1341671****', 'p': '******'}]
    user = random.choice(users)
    username = user.get('u')
    password = user.get('p')
    wait0 = WebDriverWait(browser, 60)
    wait1 = WebDriverWait(browser, 3)
    wait2 = WebDriverWait(browser, 2)


# 构造滑动轨迹
def get_trace(distance):
    '''
    :param distance: (Int)缺口离滑块的距离
    :return: (List)移动轨迹
    '''
    trace = []
    # 设置加速距离
    # faster_distance = distance * (4 / 5)
    faster_distance = distance
    # start, v0, t = 0, 0, 0.1
    start, v0, t = 0, 0, 0.1
    while start < distance:
        if start < faster_distance:
            # a = 1.5
            a = 30
        else:
            # a = -3
            a = -30
        move = v0 * t + 1 / 2 * a * t * t
        v = v0 + a * t
        v0 = v
        start += move
        trace.append(round(move))
    return trace


def get_tracks(distance):
    # 构造滑动轨迹
    tracks = []
    v = 0
    t = 0.9  # 单位时间
    current = 0  # 滑块当前位移
    distance += 10  # 多移动10px,然后回退
    while current < distance:
        if current < distance * 5 / 8:
            a = random.randint(1, 3)
        else:
            a = -random.randint(2, 4)
        v0 = v  # 初速度
        track = v0 * t + 0.5 * a * (t ** 2)  # 单位时间（0.2s）的滑动距离
        tracks.append(round(track))  # 加入轨迹
        current += round(track)
        v = v0 + a * t
    # 回退到大致位置
    for i in range(5):
        tracks.append(-random.randint(1, 2))
        # tracks.append(-random.choice([0.3, 0.5, 0.8]))
    return tracks


# 模拟拖动
def move_to_gap(trace):
    # 得到滑块标签
    # slider = wait1.until(EC.presence_of_element_located((By.CLASS_NAME, 'verify-move-block')))
    slider = wait1.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_slider__icon')))
    # 使用click_and_hold()方法悬停在滑块上，perform()方法用于执行
    ActionChains(browser).click_and_hold(slider).perform()
    for x in trace:
        # 使用move_by_offset()方法拖动滑块，perform()方法用于执行
        ActionChains(browser).move_by_offset(xoffset=x, yoffset=0).perform()
    # 模拟人类对准时间
    sleep(0.5)
    # 释放滑块
    ActionChains(browser).release().perform()


def login():
    user = wait2.until(EC.presence_of_element_located((By.ID, 'J_loginName')))
    passwd = wait2.until(EC.presence_of_element_located((By.ID, 'J_loginPsw')))
    click_login = wait2.until(EC.presence_of_element_located((By.ID, 'J_btnLogin')))
    user.send_keys(username)
    passwd.send_keys(password)
    click_login.click()


def save_data(item):
    pass


def parse_data():
    # table_reg = '//table[@class="list-table"]'
    title_reg = '//table[@class="list-table"]/tbody/tr[2]/td[contains(@class, "td-model")]//a[1]'
    brand_reg = '//table[@class="list-table"]/tbody/tr[2]/td[contains(@class, "brand")]/div[@class="list-pro"]'
    title = browser.find_element(By.XPATH, title_reg)
    # title = browser.find_elements(By.XPATH, title_reg)
    brand = browser.find_element(By.XPATH, brand_reg)
    title = title.text
    brand = brand.text
    url = browser.current_url
    print(title, brand)
    item = dict()
    item['title'] = title.strip() if title else ''
    item['brand_name'] = brand.strip() if brand else ''
    item['url'] = url
    item['url'] = url
    item["sources"] = 'hqew'
    item["creator"] = 'gxq'
    item["create_time"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    item["spider_time"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    return item


def save_bs_img(bs, path):
    imgdata = base64.b64decode(bs)
    file = open(path, 'wb')
    file.write(imgdata)
    file.close()


def save_img(url, path):
    resp = requests.get(url)
    file = open(path, 'wb')
    file.write(resp.content)
    file.close()


def main():
    init()
    resp = requests.get(hot_url, headers=headers)
    urls = re.search(r'"(.*)"', resp.text)
    urls = urls.group(1).split(',')
    for url in urls:
        url = detail_url.format(url.strip('"'))
        browser.get(url)
        for n in range(30):
            try:
                # 获取验证码img
                wait2.until(EC.presence_of_element_located((By.CLASS_NAME, 'search-wrapper')))
                print('无验证和登录')
            except:
                try:
                    wait2.until(EC.presence_of_element_located((By.ID, 'captcha_div')))
                    img1_reg = '//*[@id="captcha_div"]//img[@class="yidun_bg-img"]'
                    img2_reg = '//*[@id="captcha_div"]//img[@class="yidun_jigsaw"]'
                    img1 = browser.find_element(By.XPATH, img1_reg)
                    img2 = browser.find_element(By.XPATH, img2_reg)
                    img1_url = img1.get_attribute("src")
                    img2_url = img2.get_attribute("src")
                    # 保存验证码img
                    tt2_path = sys.path[0] + "/Material/special/hqew/tt2.jpg"
                    tt1_path = sys.path[0] + "/Material/special/hqew/tt1.jpg"
                    save_img(img1_url, tt2_path)
                    save_img(img2_url, tt1_path)
                    # 计算滑块移动距离
                    distance = detect_displacement(tt2_path, tt1_path)
                    # 计算移动轨迹
                    # trace = get_trace(distance + 6)
                    trace = get_tracks(distance + 6)
                    # 移动滑块
                    move_to_gap(trace)
                    # wait1.until(EC.presence_of_element_located((By.CLASS_NAME, 'search-wrapper')))
                    sleep(2)
                except Exception as e:
                    print('parse_img: ', e)
                try:
                    login()
                except:
                    pass
                try:
                    wait1.until(EC.presence_of_element_located((By.CLASS_NAME, 'search-wrapper')))
                    print('验证登录成功!')
                    break
                except:
                    # browser.refresh()
                    print('验证登录失败!')
        try:
            item = parse_data()
            save_data(item)
        except Exception as e:
            print(e)
        sleep(5)
    browser.close()


if __name__ == '__main__':
    main()