使用ddddocr实现文字验证码和滑块验证码

文字图片验证码识别

DDDDOCR YYDS!!!
ddddocr的数字英文识别准确率大概有个百分之九十,我用了挺久的,目前还没有遇到ddddocr识别准确率很低的数字英文验证码。

import ddddocr
# 识别验证码
def identifying_code(path):
    # show_ad广告
    ocr = ddddocr.DdddOcr(show_ad=False)
    with open(path, 'rb') as f:
        img_bytes = f.read()
    res = ocr.classification(img_bytes)
    print(res)
    return res

if __name__ == '__main__':
    result = identifying_code(r"C:\Users\Desktop\identifyingCode.jfif")

path放的是验证码图片的路径,会返回一个字符串,不过为了准确率,建议这边最好加一些判断,比如你的网站验证码是五个字符组成,如12ABC这样,就判断下字符串长度是否是5,如果不是直接刷新重新识别。

以下为滑块验证码代码

'''----------滑块验证码----------'''
import os
import base64
import time
import requests
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from io import BytesIO
import traceback
import ddddocr
# 滑块图片路径
slider_path = r"C:\Users\Desktop\shuo\slider.png"
# 背景图片路径
background_path = r"C:\Users\Desktop\shuo\background.png"


def login(webpath):
    """geetest滑块验证码"""
    driver = webdriver.Chrome()
    driver.get(webpath)
    time.sleep(2)
    # 最大化窗口
    driver.maximize_window()
    time.sleep(2)
    driver.find_element(By.XPATH,
                        '/html/body/div[3]/div/div[4]/div[1]/form/div[2]/div/div/input').send_keys('S40433')
    time.sleep(1)
    driver.find_element(By.XPATH,
                        '/html/body/div[3]/div/div[4]/div[1]/form/div[3]/div/div/input').send_keys('misumi@02')
    time.sleep(2)
    driver.find_element(By.XPATH, '/html/body/div[3]/div/div[4]/div[1]/form/button').click()
    time.sleep(5)

    for i in range(8):
        slider_url = driver.find_element(By.XPATH, '/html/body/div[3]/div/div[4]/div[2]/div/div[2]/div/div[1]/div/img').get_attribute("src")
        slider_url = slider_url.replace('data:image/png;base64,','')
        print(slider_url)
        background_url = driver.find_element(By.XPATH, '/html/body/div[3]/div/div[4]/div[2]/div/div[2]/div/div[2]/div/div/div/img').get_attribute("src")
        background_url = background_url.replace('data:image/png;base64,','')
        print(background_url)
        time.sleep(2)
        base64_to_image(slider_url,slider_path)
        base64_to_image(background_url,background_path)
        # 下载图片
        # req_slider = requests.get(slider_url)
        # with open(r"C:\Users\Desktop\shuo\slider.png", "wb") as f:
        #     f.write(req_slider.content)
        # req_background = requests.get(background_url)
        # with open(r"C:\Users\Desktop\shuo\background.png", "wb") as f:
        #     f.write(req_background.content)

        # 获取滑块在背景图片的位置
        det = ddddocr.DdddOcr(det=False, ocr=False, show_ad=False)
        with open(slider_path, 'rb') as f:
            target_bytes = f.read()
        with open(background_path, 'rb') as f:
            background_bytes = f.read()
        res = det.slide_match(target_bytes=target_bytes, background_bytes=background_bytes, simple_target=True)
        print(res)
        # 计算滑块轨迹
        distance = res["target"][0]-10
        # tracks = [distance]
        tracks = get_tracks(distance)
        print(tracks)
        time.sleep(2)
        slider_button = driver.find_element(By.XPATH, "/html/body/div[3]/div/div[4]/div[2]/div/div[2]/div/div[2]/div/div/div/img")
        # 移动滑块
        move_to_gap(driver, slider_button, tracks)
        time.sleep(2)

    driver.close()



def test():
    # 测试 滑块到背景之间得距离
    det = ddddocr.DdddOcr(det=False, ocr=False, show_ad=False)
    with open(slider_path, 'rb') as f:
        target_bytes = f.read()
    with open(background_path, 'rb') as f:
        background_bytes = f.read()
    res = det.slide_match(target_bytes=target_bytes, background_bytes=background_bytes, simple_target=True)
    print(res)


def transPNG(srcImageName, dstImageName):
    '''
    图片透明化处理
    :param srcImageName: 原图片路径
    :param dstImageName: 处理完的图片路径
    :return:
    '''
    img = Image.open(srcImageName)
    img = img.convert("RGBA")
    datas = img.getdata()
    newData = list()
    for item in datas:
        if item[0] > 220 and item[1] > 220 and item[2] > 220:
            newData.append((255, 255, 255, 0))
        else:
            newData.append(item)
    img.putdata(newData)
    img.save(dstImageName, "PNG")


def get_tracks(distance):
    """
    根据偏移量获取移动轨迹
    :param distance:偏移量
    :return:移动轨迹
    """
    # 移动轨迹
    tracks = []
    # 减速阈值
    mid = distance * 4 / 5

    for i in range(5):
        tracks.append(mid/5)
    for j in range(2):
        tracks.append(distance/5/2)

    return tracks


def move_to_gap(browser, slider, tracks):
    """
    拖动滑块
    :param browser 浏览器
    :param slider: 滑块
    :param tracks: 轨迹
    :return:
    """
    # 模拟滑动滑块
    action = ActionChains(browser)
    action.click_and_hold(slider).perform()
    # action.reset_actions()   # 清除之前的action
    for i in tracks:
        action.move_by_offset(xoffset=i, yoffset=0).perform()
    time.sleep(0.5)
    action.release().perform()

def base64_to_image(base64_string,save_path):
    try:
        image_data = base64.b64decode(base64_string)
        image = Image.open(BytesIO(image_data))
        image.save(save_path)
    except:
        print(traceback.format_exc())
    # return image
if __name__ == '__main__':
	web_path = ""
    login(web_path)

这个是我之前测试用的一个网站,就不展示网站了,此控件录制也是根据测试网站来录制,测试网站的滑块图片和背景图都是以base64编码存放的,所以又写了一个base64转图片的方法,有的网站这儿直接是url的,则可以下载图片文件即可。

其中,在计算滑块轨迹时,因为每个网站可能会有偏差,所以要对滑块轨迹进行微调,故在这进行小偏移即可,我这边计算出之后,发现位移偏差在10左右,故我这-10,实际可以根据网站的滑块和背景图的大小进行加减

计算滑块轨迹
distance = res[“target”][0]-10

get_tracks方法是把滑块运行分成几段移动,以此来防止有的网站会阻止一次移动到位的行为。

  • 16
    点赞
  • 25
    收藏
    觉得还不错? 一键收藏
  • 7
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值