滑块验证码selenium识别(hqew)

在这里插入图片描述
解决思路: 图片链接提取下载-图片处理(降噪-灰度二值化-缺口横坐标计算-构造滑动轨迹-模拟拖动-验证)

import base64
import json
from datetime import datetime
import cv2
import requests
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from urllib.request import urlretrieve
from bs4 import BeautifulSoup
import re, os, sys
from PIL import Image
from time import sleep
import pymysql
import random
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
sys.path.insert(0, BASE_DIR)


def _tran_canny(image):
    """降噪"""
    image = cv2.GaussianBlur(image, (3, 3), 0)
    return cv2.Canny(image, 50, 150)


def detect_displacement(img_slider_path, image_background_path):
    """detect displacement"""
    # 灰度化
    image = cv2.imread(img_slider_path, 0)
    template = cv2.imread(image_background_path, 0)
    # 图片模板位置匹配 归一化相关系数匹配法cv2.TM_CCOEFF_NORMED
    res = cv2.matchTemplate(_tran_canny(image), _tran_canny(template), method=cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    top_left = max_loc[0]  # 横坐标
    print(top_left)
    # 展示圈出来的区域
    # x, y = max_loc  # 获取x,y位置坐标
    #
    # w, h = image.shape[::-1]  # 宽高
    # cv2.rectangle(template, (x, y), (x + w, y + h), (7, 249, 151), 2)
    # show(template)
    return top_left


def init():
    global index_url, hot_url, detail_url, headers, browser, username, password, wait0, wait1, wait2
    index_url = 'https://www.hqew.com/'
    hot_url = 'https://product.hqew.com/home/hotsearch?callback=hotwordsCallback'
    detail_url = 'https://s.hqew.com/{}.html?from=hotsearch'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
               'host': 'product.hqew.com'}
    options = webdriver.ChromeOptions()
    # options.add_argument('--headless')
    # options.add_argument('--disable-gpu')
    # options.add_argument('--no-sandbox')
    # options.add_argument('disable-infobars')
    # options.add_experimental_option('useAutomationExtension', False)
    # options.add_experimental_option('excludeSwitches', ['enable-automation'])
    # options.add_argument("--disable-blink-features=AutomationControlled")
    # options.add_experimental_option("excludeSwitches", ["enable-logging"])
    # options.add_argument('-ignore-certificate-errors')
    # options.add_argument('-ignore -ssl-errors')
    browser = webdriver.Chrome(chrome_options=options)
    browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """
        Object.defineProperty(navigator, 'webdriver', {
          get: () => false
        })
      """
    })
    # browser = webdriver.Chrome(executable_path = '/usr/local/bin/chromedriver', chrome_options=options)
    browser.maximize_window()
    users = [{'u': '1341671****', 'p': '******'}]
    user = random.choice(users)
    username = user.get('u')
    password = user.get('p')
    wait0 = WebDriverWait(browser, 60)
    wait1 = WebDriverWait(browser, 3)
    wait2 = WebDriverWait(browser, 2)


# 构造滑动轨迹
def get_trace(distance):
    '''
    :param distance: (Int)缺口离滑块的距离
    :return: (List)移动轨迹
    '''
    trace = []
    # 设置加速距离
    # faster_distance = distance * (4 / 5)
    faster_distance = distance
    # start, v0, t = 0, 0, 0.1
    start, v0, t = 0, 0, 0.1
    while start < distance:
        if start < faster_distance:
            # a = 1.5
            a = 30
        else:
            # a = -3
            a = -30
        move = v0 * t + 1 / 2 * a * t * t
        v = v0 + a * t
        v0 = v
        start += move
        trace.append(round(move))
    return trace


def get_tracks(distance):
    # 构造滑动轨迹
    tracks = []
    v = 0
    t = 0.9  # 单位时间
    current = 0  # 滑块当前位移
    distance += 10  # 多移动10px,然后回退
    while current < distance:
        if current < distance * 5 / 8:
            a = random.randint(1, 3)
        else:
            a = -random.randint(2, 4)
        v0 = v  # 初速度
        track = v0 * t + 0.5 * a * (t ** 2)  # 单位时间(0.2s)的滑动距离
        tracks.append(round(track))  # 加入轨迹
        current += round(track)
        v = v0 + a * t
    # 回退到大致位置
    for i in range(5):
        tracks.append(-random.randint(1, 2))
        # tracks.append(-random.choice([0.3, 0.5, 0.8]))
    return tracks


# 模拟拖动
def move_to_gap(trace):
    # 得到滑块标签
    # slider = wait1.until(EC.presence_of_element_located((By.CLASS_NAME, 'verify-move-block')))
    slider = wait1.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_slider__icon')))
    # 使用click_and_hold()方法悬停在滑块上,perform()方法用于执行
    ActionChains(browser).click_and_hold(slider).perform()
    for x in trace:
        # 使用move_by_offset()方法拖动滑块,perform()方法用于执行
        ActionChains(browser).move_by_offset(xoffset=x, yoffset=0).perform()
    # 模拟人类对准时间
    sleep(0.5)
    # 释放滑块
    ActionChains(browser).release().perform()


def login():
    user = wait2.until(EC.presence_of_element_located((By.ID, 'J_loginName')))
    passwd = wait2.until(EC.presence_of_element_located((By.ID, 'J_loginPsw')))
    click_login = wait2.until(EC.presence_of_element_located((By.ID, 'J_btnLogin')))
    user.send_keys(username)
    passwd.send_keys(password)
    click_login.click()


def save_data(item):
    pass


def parse_data():
    # table_reg = '//table[@class="list-table"]'
    title_reg = '//table[@class="list-table"]/tbody/tr[2]/td[contains(@class, "td-model")]//a[1]'
    brand_reg = '//table[@class="list-table"]/tbody/tr[2]/td[contains(@class, "brand")]/div[@class="list-pro"]'
    title = browser.find_element(By.XPATH, title_reg)
    # title = browser.find_elements(By.XPATH, title_reg)
    brand = browser.find_element(By.XPATH, brand_reg)
    title = title.text
    brand = brand.text
    url = browser.current_url
    print(title, brand)
    item = dict()
    item['title'] = title.strip() if title else ''
    item['brand_name'] = brand.strip() if brand else ''
    item['url'] = url
    item['url'] = url
    item["sources"] = 'hqew'
    item["creator"] = 'gxq'
    item["create_time"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    item["spider_time"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    return item


def save_bs_img(bs, path):
    imgdata = base64.b64decode(bs)
    file = open(path, 'wb')
    file.write(imgdata)
    file.close()


def save_img(url, path):
    resp = requests.get(url)
    file = open(path, 'wb')
    file.write(resp.content)
    file.close()


def main():
    init()
    resp = requests.get(hot_url, headers=headers)
    urls = re.search(r'"(.*)"', resp.text)
    urls = urls.group(1).split(',')
    for url in urls:
        url = detail_url.format(url.strip('"'))
        browser.get(url)
        for n in range(30):
            try:
                # 获取验证码img
                wait2.until(EC.presence_of_element_located((By.CLASS_NAME, 'search-wrapper')))
                print('无验证和登录')
            except:
                try:
                    wait2.until(EC.presence_of_element_located((By.ID, 'captcha_div')))
                    img1_reg = '//*[@id="captcha_div"]//img[@class="yidun_bg-img"]'
                    img2_reg = '//*[@id="captcha_div"]//img[@class="yidun_jigsaw"]'
                    img1 = browser.find_element(By.XPATH, img1_reg)
                    img2 = browser.find_element(By.XPATH, img2_reg)
                    img1_url = img1.get_attribute("src")
                    img2_url = img2.get_attribute("src")
                    # 保存验证码img
                    tt2_path = sys.path[0] + "/Material/special/hqew/tt2.jpg"
                    tt1_path = sys.path[0] + "/Material/special/hqew/tt1.jpg"
                    save_img(img1_url, tt2_path)
                    save_img(img2_url, tt1_path)
                    # 计算滑块移动距离
                    distance = detect_displacement(tt2_path, tt1_path)
                    # 计算移动轨迹
                    # trace = get_trace(distance + 6)
                    trace = get_tracks(distance + 6)
                    # 移动滑块
                    move_to_gap(trace)
                    # wait1.until(EC.presence_of_element_located((By.CLASS_NAME, 'search-wrapper')))
                    sleep(2)
                except Exception as e:
                    print('parse_img: ', e)
                try:
                    login()
                except:
                    pass
                try:
                    wait1.until(EC.presence_of_element_located((By.CLASS_NAME, 'search-wrapper')))
                    print('验证登录成功!')
                    break
                except:
                    # browser.refresh()
                    print('验证登录失败!')
        try:
            item = parse_data()
            save_data(item)
        except Exception as e:
            print(e)
        sleep(5)
    browser.close()


if __name__ == '__main__':
    main()

  • 3
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值