前要
参考博客
自动化测试框架:DrissionPage
https://blog.csdn.net/m0_51805222/article/details/134142594
python 使用ddddocr库实现滑块验证码滑动验证
https://blog.csdn.net/davice_li/article/details/135713351
1. 网站分析
因网站原因, 他的滑块缺口其实只有一张图片, 只不过是通过 js 和 css 进行分割了, 所以 generate_distance 滑块图片我直接固定死了, 背景图片从页面获取
滑块图片
背景图片
2. js分析
其实缺口x坐标直接就能从js获取, 本人技术有限不知道怎么获取
3. 代码展示
import random
import time
import cv2
import numpy as np
from DrissionPage import ChromiumPage
page = ChromiumPage()
def generate_distance():
"""
:param bg_url: 背景图地址
:param slice_url: 滑块图地址
:return: distance
:rtype: Integer
"""
with open('captcha.png', 'rb') as f:
bg_image = f.read()
with open('slice_image.png', 'rb') as f:
slice_image = f.read()
slice_image = np.asarray(bytearray(slice_image), dtype=np.uint8)
slice_image = cv2.imdecode(slice_image, 1)
slice_image = cv2.Canny(slice_image, 255, 255)
bg_image = np.asarray(bytearray(bg_image), dtype=np.uint8)
bg_image = cv2.imdecode(bg_image, 1)
bg_image = cv2.pyrMeanShiftFiltering(bg_image, 5, 50)
bg_image = cv2.Canny(bg_image, 255, 255)
result = cv2.matchTemplate(bg_image, slice_image, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
return max_loc[0]
# 滑动轨迹模拟
def generate_track(distance):
def __ease_out_expo(step):
return 1 if step == 1 else 1 - pow(2, -10 * step)
tracks = [[random.randint(20, 60), random.randint(10, 40), 0]]
count = 30 + int(distance / 2)
_x, _y = 0, 0
for item in range(count):
x = round(__ease_out_expo(item / count) * distance)
t = random.randint(10, 20)
if x == _x:
continue
tracks.append([x - _x, _y, t])
_x = x
tracks.append([0, 0, random.randint(200, 300)])
times = sum([track[2] for track in tracks])
return tracks, times
def captcha():
page.get('http://stats.customs.gov.cn/')
time.sleep(1)
page.ele('xpath://span[text()="出口"]/following::*[1]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="year"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="year"]/option[@value="2022"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="endMonth"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="endMonth"]/option[text()="12"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="outerField1"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="outerField1"]/option[@name="CODE_TS"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://a[@id="button1"]', timeout=2).click()
time.sleep(5)
page.ele('xpath://span[text()="6位"]/following::*[1]', timeout=2).click()
time.sleep(5)
page.ele('xpath://div[@class="text-c"]/input', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="outerField2"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="outerField2"]/option[@name="ORIGIN_COUNTRY"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="outerField3"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="outerField3"]/option[@name="TRADE_MODE"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="outerField4"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="outerField4"]/option[@name="TRADE_CO_PORT"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://a[@id="doSearch"]', timeout=2).click()
time.sleep(1)
try:
page.ele('xpath://div[@class="layui-layer-btn"]/a[@class="layui-layer-btn0"]', timeout=2).click()
except:
pass
time.sleep(1)
while True:
page.ele('xpath://div[@id="captcha"]/canvas', timeout=2).get_screenshot('captcha.png')
time.sleep(2)
x_num = generate_distance()
x_num = x_num + 15
print(x_num)
element = page.ele('xpath://div[@class="sliderMask"]/div[@class="slider"]')
# drag 有三个参数 offset_x x轴移动距离 offset_y y轴移动距离 duration 滑动速度 这个默认.5
element.drag(offset_x=x_num)
msg = page.ele('xpath://div[@id="msg"]', timeout=2).text
print(msg)
if msg == '验证通过!':
time.sleep(2)
page.ele('xpath://div[@class="page-container"]//a[@id="doSearch"]', timeout=2).click()
break
time.sleep(3)
time.sleep(5)
def get_data():
table = page.eles('xpath://tbody[@id="div1"]//tr', timeout=2)
for tr in table:
ls = []
td_ls = tr.eles('xpath:.//td')
for td in td_ls:
if td.ele('xpath:./div').property('title'):
txt = td.ele('xpath:./div').property('title')
else:
txt = td.ele('xpath:./div').text
ls.append(txt)
print(ls)
with open('data.txt', 'a+', encoding='utf-8') as f:
f.write(" ".join(ls)+'\n')
def run():
captcha()
time.sleep(20)
page.ele('xpath://select[@id="pageSize"]', timeout=2).click()
time.sleep(1)
page.ele('xpath://select[@id="pageSize"]/option[@value="200"]', timeout=2).click()
time.sleep(20)
for num in range(10000):
try:
print(num)
get_data()
page.ele('xpath://a[text()="下页"]', timeout=2).click()
time.sleep(20)
except Exception as e:
print(e)
run()