十七、打码平台的使用

最新推荐文章于 2024-06-23 23:25:41 发布

bug_fu

最新推荐文章于 2024-06-23 23:25:41 发布

阅读量1w

点赞数 2

分类专栏：爬虫爬虫学习之路文章标签：打码平台云打码若快打码

本文链接：https://blog.csdn.net/weixin_42633359/article/details/85014616

版权

爬虫同时被 2 个专栏收录

33 篇文章 0 订阅

订阅专栏

爬虫学习之路

25 篇文章 1 订阅

订阅专栏

1、常见的打码平台

云打码：http://www.yundama.com/ （能够解决通用的验证码识别）

极验验证码智能识别辅助：http://jiyandoc.c2567.com/ （能够解决复杂验证码的识别）

2、云打码的使用

2.1 云打码官方接口

下面代码是云打码平台提供，做了个简单修改，实现了两个方法：

indetify:传入图片的响应二进制数即可
indetify_by_filepath:传入图片的路径即可识别

其中需要自己配置的地方是：

username = 'whoarewe' # 用户名

password = '***' # 密码

appid = 4283 # appid

appkey = '02074c64f0d0bb9efb2df455537b01c3' # appkey

codetype = 1004 # 验证码类型

云打码官方提供的api如下：

# THE WINTER IS COMING! the old driver will be driving who was a man of the world!
# -*- coding: utf-8 -*- python 3.6.7, create time is 18-12-13 上午9:33 GMT+8

# coding:utf-8

import requests
import json
import time

class YDMHttp:
    apiurl = 'http://api.yundama.com/api.php'
    username = ''
    password = ''
    appid = ''
    appkey = ''

    def __init__(self, username, password, appid, appkey):
        self.username = username
        self.password = password
        self.appid = str(appid)
        self.appkey = appkey

    def request(self, fields, files=[]):
        print(files)
        response = self.post_url(self.apiurl, fields, files)
        response = json.loads(response)
        return response

    def balance(self):
        data = {'method': 'balance', 'username': self.username, 'password': self.password, 'appid': self.appid,
                'appkey': self.appkey}
        response = self.request(data)
        if (response):
            if (response['ret'] and response['ret'] < 0):
                return response['ret']
            else:
                return response['balance']
        else:
            return -9001

    def login(self):
        data = {'method': 'login', 'username': self.username, 'password': self.password, 'appid': self.appid,
                'appkey': self.appkey}
        response = self.request(data)
        if (response):
            if (response['ret'] and response['ret'] < 0):
                return response['ret']
            else:
                return response['uid']
        else:
            return -9001

    def upload(self, filename, codetype, timeout):
        data = {'method': 'upload', 'username': self.username, 'password': self.password, 'appid': self.appid,
                'appkey': self.appkey, 'codetype': str(codetype), 'timeout': str(timeout)}
        file = {'file': filename}
        print(file)
        response = self.request(data, file)
        print(response)
        if (response):
            if (response['ret'] and response['ret'] < 0):
                return response['ret']
            else:
                return response['cid']
        else:
            return -9001

    def result(self, cid):
        data = {'method': 'result', 'username': self.username, 'password': self.password, 'appid': self.appid,
                'appkey': self.appkey, 'cid': str(cid)}
        response = self.request(data)
        return response and response['text'] or ''

    def decode(self, filename, codetype, timeout):
        cid = self.upload(filename, codetype, timeout)
        if (cid > 0):
            for i in range(0, timeout):
                result = self.result(cid)
                if (result != ''):
                    return cid, result
                else:
                    time.sleep(1)
            return -3003, ''
        else:
            return cid, ''

    def post_url(self, url, fields, files=[]):
        for key in files:
            files[key] = open(files[key], 'rb')
        res = requests.post(url, files=files, data=fields)
        # print(res.request.__dict__)
        return res.text



appid = 4283 # appid

appkey = '02074c64f0d0bb9efb2df455537b01c3' # appkey

filename = 'b.jpg' # 文件位置

codetype = 1004 # 验证码类型

# 超时
timeout = 60

def indetify(response_content):
    if (username == 'username'):
        print('请设置好相关参数再测试')
    else:
        # 初始化
        yundama = YDMHttp(username, password, appid, appkey)

        # 登陆云打码
        uid = yundama.login()
        print('uid: %s' % uid)

        # 查询余额
        balance = yundama.balance()
        print('balance: %s' % balance)

        # 开始识别，图片路径，验证码类型ID，超时时间（秒），识别结果
        cid, result = yundama.decode(response_content, codetype, timeout)
        print('cid: %s, result: %s' % (cid, result))
        return result

def indetify_by_filepath(file_path):
    if (username == 'username'):
        print('请设置好相关参数再测试')
    else:
        # 初始化
        yundama = YDMHttp(username, password, appid, appkey)

        # 登陆云打码
        uid = yundama.login()
        print('uid: %s' % uid)

        # 查询余额
        balance = yundama.balance()
        print('balance: %s' % balance)

        # 开始识别，图片路径，验证码类型ID，超时时间（秒），识别结果
        cid, result = yundama.decode(file_path, codetype, timeout)
        print('cid: %s, result: %s' % (cid, result))
        return result

if __name__ == '__main__':

    indetify_by_filepath('b.jpg')

2.2 代码中调用云打码的接口

下面以豆瓣登录过程中的验证码为例，了解云打码如何使用

# coding=utf-8
from selenium import webdriver
import time
import  requests
from yundama import indetify

driver = webdriver.Chrome()

driver.get("https://www.douban.com/")

#输入用户名
driver.find_element_by_id("form_email").send_keys("78****@qq.com")

#输入密码
driver.find_element_by_id("form_password").send_keys("****")

#获取验证码的地址
img_url = driver.find_element_by_id("captcha_image").get_attribute("src")
response = requests.get(img_url)  #请求验证码的地址
ret = indetify(response.content)  #验证码识别

#输入验证码
driver.find_element_by_id("captcha_field").send_keys(ret)


time.sleep(5)
#点击登录
driver.find_element_by_class_name("bn-submit").click()


time.sleep(10)
print(driver.get_cookies())
driver.quit()

2.3 若快打码识别

import hashlib
import requests
from datetime import datetime

RUOUSER = ''
RUOPASS = ''

# 若快 12306打码 直接传入本地文件路径
def getCode(img):
    url = "http://api.ruokuai.com/create.json"
    fileBytes = open(img, "rb").read()
    paramDict = {
        'username': RUOUSER,
        'password': RUOPASS,
        'typeid': 6113, # 专门用来识别12306图片验证的类型id
        'timeout': 90,
        'softid': 117157, # 推广用的
        'softkey': '70acaa1e477a4374a7736264a24b974b' # 推广用的
    }
    paramKeys = ['username',
                 'password',
                 'typeid',
                 'timeout',
                 'softid',
                 'softkey'
                 ]
    result = http_upload_image(url, paramKeys, paramDict, fileBytes)
    return result['Result']


# 若快12306打码 上传图片
def http_upload_image(url, paramKeys, paramDict, filebytes):
    timestr = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    boundary = '------------' + hashlib.md5(timestr.encode("utf8")).hexdigest().lower()
    boundarystr = '\r\n--%s\r\n' % (boundary)

    bs = b''
    for key in paramKeys:
        bs = bs + boundarystr.encode('ascii')
        param = "Content-Disposition: form-data; name=\"%s\"\r\n\r\n%s" % (key, paramDict[key])
        # print param
        bs = bs + param.encode('utf8')
    bs = bs + boundarystr.encode('ascii')

    header = 'Content-Disposition: form-data; name=\"image\"; filename=\"%s\"\r\nContent-Type: image/gif\r\n\r\n' % ('sample')
    bs = bs + header.encode('utf8')

    bs = bs + filebytes
    tailer = '\r\n--%s--\r\n' % (boundary)
    bs = bs + tailer.encode('ascii')

    headers = {'Content-Type': 'multipart/form-data; boundary=%s' % boundary,
               'Connection': 'Keep-Alive',
               'Expect': '100-continue',
               }
    response = requests.post(url, params='', data=bs, headers=headers)
    return response.json()


if __name__ == '__main__':
    # 测试
    ret = getCode('../captcha_imgs/1544505888345_3.png')
    print(ret)

3、常见的验证码的种类

3.1 url地址不变，验证码不变

这是验证码里面非常简单的一种类型，对应的只需要获取验证码的地址，然后请求，通过打码平台识别即可。

3.2 url地址不变，验证码变化

这种验证码的类型是更加常见的一种类型，对于这种验证码，大家需要思考：

在登录的过程中，假设我输入的验证码是对的，对方服务器是如何判断当前我输入的验证码是显示在我屏幕上的验证码，而不是其他的验证码呢？
在获取网页的时候，请求验证码，以及提交验证码的时候，对方服务器肯定通过了某种手段验证我之前获取的验证码和最后提交的验证码是同一个验证码，那这个手段是什么手段呢？

很明显，就是通过cookie来实现的，所以对应的，在请求页面，请求验证码，提交验证码的到时候需要保证cookie的一致性，对此可以使用requests.session来解决

bug_fu

关注

2
点赞
踩
12

收藏

觉得还不错? 一键收藏
2
评论
十七、打码平台的使用

1、常见的打码平台云打码：http://www.yundama.com/ （能够解决通用的验证码识别）极验验证码智能识别辅助：http://jiyandoc.c2567.com/ （能够解决复杂验证码的识别）2、云打码的使用2.1 云打码官方接口下面代码是云打码平台提供，做了个简单修改，实现了两个方法：indetify:传入图片的响应二进制数即可indetif...
复制链接

扫一扫

专栏目录