空间验证码训练集获取

最近有个需求在爬取数据时碰到验证码。准备使用yolov8训练模型但没有训练集

基本就是这个大佬的代码:我稍作改动

https://github.com/cycyup/crack_geetest

import requests
import time
import json


def geetest_crack():
    session = requests.session()

    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
        'accept': 'application/json, text/plain, */*',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'zh,zh-TW;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6'
    }

    register_url = 'https://www.geetest.com/api/user/show/register-space?t=' + str(int(round(time.time() * 1000)))

    res = session.get(url=register_url, headers=headers)
    res = json.loads(res.text)
    gt, challenge = res['gt'], res['challenge']

    get_url = f"https://api.geetest.com/get.php?gt={gt}&challenge={challenge}""&lang=zh-cn&pt=0&client_type=web&w=&callback=geetest_{str(int(round(time.time() * 1000)))}"

    res1 = session.get(url=get_url, headers=headers)

    ajax_url = 'https://api.geetest.com/ajax.php?' \
                       'gt='+gt+'&' \
                       'challenge='+challenge+'&' \
                       'lang=zh-cn&pt=0&client_type=web_mobile&' \
                       'w=&' \
                       'callback=geetest_' + str(int(time.time())*1000)
    res2 = session.get(url=ajax_url, headers=headers)

    get_spcae_url = 'https://api.geetest.com/get.php?is_next=true&type=click&' \
                    'gt=' + gt + '&' \
                    'challenge=' + challenge + '&' \
                    'lang=zh-cn&https=true&protocol=https%3A%2F%2F&offline=false&' \
                    'product=popup&api_server=api.geetest.com&isPC=true&autoReset=true&' \
                    'width=100%25&callback=geetest_' + str(int(time.time()) * 1000)
    data = session.get(url=get_spcae_url, headers=headers).text
    data = json.loads(data[22:-1])['data']

    sign_value = data['sign']
    pic_value = data['pic']

    print("sign的值为:", sign_value)
    picurl = "https://static.geetest.com" + pic_value
    print(picurl)
    response = requests.get(picurl)
    if response.status_code == 200:
        with open(""+str(int(round(time.time() * 1000)))+"geetest_image.jpg", "wb") as file:
            file.write(response.content)
        print("图片已保存为 "+str(int(round(time.time() * 1000)))+"geetest_image.jpg")
    else:
        print("图片下载失败,HTTP状态码:", response.status_code)


if __name__ == '__main__':
    for i in range(1, 100):
        print(i)
        geetest_crack()
        time.sleep(10)

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值