百度AI 识别验证码 脚本查询数据

流程:

1.抓取网页结构
2.人工判断需要解析的信息与配置对应正则
3.正则抓出验证码ID
4.使用获取到的ID请求图片并且保存(由于对方接口返回的不是带有正确后缀的图片,所以不能直接传给百度url)
5.上传百度AI文字或图片识别接口,根据返回内容判断图片识别是否正确
6.发送对应内容表单并处理返回值
7.返回正确保存对应内容,返回错误则循环之前流程直到成功(百度AI识别完全正确率30%左右,要注意接口调用量)

代码如下

let co = require('co'); //异步控制器
let { sendHttpRequest } = require('../tools/httpTool');//封装的http处理
var fs = require('fs');//文件操作
const xlsx = require('node-xlsx');//xlsx解析
const { sqlModel } = require('./riskInquiryApi-sqlModel')//sql相关操作

var path = require("path");
/**
 * url 文件夹路径
 * delFolder 是否删除文件夹
 */
let delFolderContents = function (url, delFolder) {
    var files = [];
    //判断给定的路径是否存在
    if (fs.existsSync(url)) {
        //返回文件和子目录的数组
        files = fs.readdirSync(url);
        //遍历当前文件夹下内容
        files.forEach(function (file, index) {
            var curPath = path.join(url, file);
            //fs.statSync同步读取文件夹文件,如果是文件夹,在重复触发函数
            if (fs.statSync(curPath).isDirectory()) {
                delFolderContents(curPath, true);
            } else {
                // 是文件delete file
                fs.unlinkSync(curPath);
            }
        });
        //清除文件夹
        if (delFolder) {
            fs.rmdirSync(url);
        }

    } else {
        console.log("给定的路径不存在,请给出正确的路径");
    }
};


var AipOcrClient = require("baidu-aip-sdk").ocr;
let checking = false;
// 设置APPID/AK/SK
var APP_ID = "APP_ID APP_ID APP_ID APP_ID APP_ID ";
var API_KEY = "API_KEY API_KEY API_KEY ";
var SECRET_KEY = "SECRET_KEY SECRET_KEY SECRET_KEY SECRET_KEY ";

// 新建一个对象,建议只保存一个对象调用服务接口
var client = new AipOcrClient(APP_ID, API_KEY, SECRET_KEY);

let nowDate = new Date();
nowDate = nowDate.getFullYear() + "-" + (nowDate.getMonth() * 1 + 1) + "-" + nowDate.getDate();
let checkIndex = 0;
let sleepTime = 1000;
function getExecStrs(str) {
    var reg = /value=\"(.+?)\"/g
    var list = []
    var result = null
    do {
        result = reg.exec(str)
        result && list.push(result[1])
    } while (result)
    return list
}


let total = 0;
let nowIndex = 0;
let partnerList = [];
let partnersearch = async (ctx, next) => {
    let url = ctx.url;
    let request = ctx.request;
    let req_query = request.query;
    let req_queryString = request.queryString;
    let req_body = request.body;

    let filters = { ...req_body };

    await sqlModel.partnersearch(filters).then(res => {
        console.log("已获取到名单:" + res.length)
        total = res.length;
        partnerList = res;
        ctx.body = {
            code: 0,
            data: res,
            success: true
        }
    })
}

let checkSafe = (ctx, next) => {
    if (checking) {
        ctx.body = {
            code: -1,
            msg: "排查运行中,当前进度:" + checkIndex + "/" + partnerList.length,
            success: true
        }
    } else {
        ctx.body = {
            code: 0,
            msg: "开始确认名单",
            success: true
        }
        delFolderContents("./webData", false);//运行前清理之前保存的内容,避免一个图片写入多次导致结构错误
        checkSafeRun();
    }
}
let checkSafeRun= async () => {
    async function checkOne(options) {
        try {

            var body_request = {
                hostname: "aaaa.aaa.aaa",
                path: "/aaa/",
                port: 80,
                method: "get",
                headers: {
                    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
                    "Accept-Language": "zh-CN,zh;q=0.9",
                    "Host": "aaaaa.aaa.aa",
                    "Referer": "bbbb.bbb.bb",
                    "Upgrade-Insecure-Requests": "1",
                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"
                }
            };
            let res = await co(function* () {
                let req_res = yield sendHttpRequest(body_request, "");
                return req_res
            });
            res = res.data
            let t1 = res.indexOf("captchaXgl.do?captchaId=");
            let t2 = res.indexOf("&random=");
            let captCode = res.slice(t1, t2).replace("captchaXgl.do?captchaId=", "")
            let filename = options.codeIndex;
            if (!captCode) {
                if (sleepTime != 1000) {
                    sleepTime = sleepTime * 2;
                } else {

                    sleepTime = 1000 * 60;
                }
                return false
            } else {
                sleepTime = 1000
            }
            var imgdata_request = {
                hostname: "aaa.aa.aa",
                path: "aaaa.aa.aa/captchaXgl.do?captchaId=" + captCode + "&random=" + Math.random().toFixed(16),
                port: 80,
                method: "get",
                headers: {
                    "Accept": " image/webp,image/apng,image/*,*/*;q=0.8",
                    "Accept-Language": "zh-CN,zh;q=0.9",
                    "Host": "aaaa.aaa.aa",
                    "Referer": "bbbb.bbb.bb",
                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"
                }
            };
            let imgdata = await co(function* () {
                let req_res = yield sendHttpRequest(imgdata_request, "", { setEncoding: "binary", notpackage: true });
                return req_res
            });

            try {
                fs.writeFile("webData/" + filename + ".jpg", imgdata, 'binary', async function (err) {  //path为本地路径例如public/logo.png
                    if (err) {
                        fs.readFile('errorlog.txt', async function (err, data) {
                            if (err) {
                                let text = "保存失败:" + err
                                fs.writeFile('errorlog.txt', data.toString() + '/r/n' + text, async function (err) {
                                    if (err) {
                                        return console.error(err);
                                    }
                                });
                            } else {

                            }

                        });
                    } else {
                        var image = fs.readFileSync("webData/" + filename + ".jpg").toString("base64");
                        // 调用通用文字识别, 图片参数为远程url图片
                        var bdAPIoptions = {};
                        bdAPIoptions["language_type"] = "ENG";
                        //client.accurateBasic(image,options).then(async function (result) {

                        client.generalBasic(image, bdAPIoptions).then(async function (result) {
                            let words_result = result?.words_result[0]?.words || "";
                            words_result = words_result.replace(/ /ig, "");
                            if (/^[0-9a-zA-Z]{4}$/.test(words_result)) {

                                let Words_resultBack = await co(function* () {
                                    let postdata = {
                                       /**** datas  *****/
                                    };
                                    let temp = "";
                                    for (let key in postdata) {
                                        temp = temp + (temp == "" ? "" : "&") + key + "=" + postdata[key]
                                    }
                                    postdata = temp;
                                    var Words_result_request = {
                                        hostname: "aa.aa.aa",
                                        path: "aaaa.aaa.aa/checkyzm?captchaId=" + captCode + "&pCode=" + words_result,
                                        port: 80,
                                        method: "get",
                                        headers: {
                                            "Accept": "application/json, text/javascript, */*; q=0.01",
                                            "Accept-Language": "zh-CN,zh;q=0.9",
                                            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
                                            "Host": "aaaa.aaaa.aa",
                                            "Referer": "bbbbbb.bbb.bb",
                                            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
                                            "X-Requested-With": "XMLHttpRequest",
                                        }
                                    };

                                    let req_res = yield sendHttpRequest(Words_result_request, "");
                                    return req_res
                                });
                                console.log(Words_resultBack, Words_resultBack.data * 1 == 1 ? "验证码通过" : "验证码未通过");

                                if (Words_resultBack.data * 1 == 1) {
                                    let checkBack = await co(function* () {
                                        let postdata = {
                                           /**** datas  *****/
                                        };
                                        let temp = "";
                                        for (let key in postdata) {
                                            temp = temp + (temp == "" ? "" : "&") + key + "=" + encodeURI(postdata[key]);
                                        }
                                        postdata = temp;
                                        var check_request = {
                                            hostname: "AAAAA.AAAAA.AA",
                                            path: "AAAAA.AAAAA.AA/AAA.do?" + postdata,
                                            port: 80,
                                            method: "get",
                                            headers: {
                                                "Accept": "application/json, text/javascript, */*; q=0.01",
                                                "Accept-Language": "zh-CN,zh;q=0.9",
                                                "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
                                                "Host": "AAAAA.AAAAA.AA",
                                                "Referer": "BBBBB.BBBBBB.BB",
                                                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
                                                "X-Requested-With": "XMLHttpRequest",
                                            }
                                        };

                                        let req_res = yield sendHttpRequest(check_request, postdata, { showcontent: true });
                                        return req_res
                                    });
                                    let issafe = "";
                                    if (checkBack?.data[0]?.result.length > 0) {
                                        issafe = "-1";
                                        checkIndex++;
                                        sqlModel.updatePartnerState([issafe, nowDate, options.id]);
                                        console.log("checkIndex++", checkIndex)
                                    } else {
                                        issafe = "1";
                                        checkIndex++;
                                        sqlModel.updatePartnerState([issafe, nowDate, options.id]);
                                        console.log("checkIndex++", checkIndex)
                                    }
                                }
                            } else {
                                console.log("验证码无效");
                                console.log("==================");
                            }
                        }).catch(function (err) {
                            // 如果发生网络错误
                            console.log(err);
                        });
                    }
                })
            } catch (e) {
                console.log("写入失败")
            }
        } catch (e) {
            console.log(e);
        }
    }
    if (partnerList.length == 0) {
        console.log("请先获取合伙人列表");
        return false
    }
    for (checkIndex = 0; checkIndex < partnerList.length; checkIndex = checkIndex) {
        if (
            //partnerList[checkIndex].checkTime != nowDate || 
            (partnerList[checkIndex].issafe * 1 != 1 && partnerList[checkIndex].issafe * 1 != -1)
            ||
            ((new Date(partnerList[checkIndex].checkTime).getTime() + (1000 * 60 * 60 * 24 * 30)) < new Date(nowDate).getTime())
        ) {
            console.log("当前进度:" + checkIndex + "/" + partnerList.length, new Date().getTime())
            await checkOne({
                codeIndex: checkIndex,
                id: partnerList[checkIndex].id,
                pName: partnerList[checkIndex].partner_name || "",
                pCardNum: partnerList[checkIndex].partner_id_or_org_id || "",
            });
            await sleepPromise(sleepTime);
        } else {
            checkIndex++
        }
    }
}

/*** 防止高频访问出现二次验证的等待函数**/
function sleepPromise(ms) {
    return new Promise(resolve => setTimeout(resolve, ms))
}

let cacheExcl = async (ctx, next) => {
    let url = ctx.url;
    let request = ctx.request;
    let req_query = request.query;
    let req_queryString = request.queryString;
    let req_body = request.body;
    let xlsxData = [];
    await new Promise((resolve, reject) => {
        //解析xlsx
        xlsxData = xlsx.parse(request.files.file.path);
        resolve(xlsxData);
    })
    try {
        for (let i = 0; i < xlsxData[0].data.length; i++) {
            if (i != 0) {
                await sqlModel.addPartner(xlsxData[0].data[i])
            }
        }
        ctx.body = {
            code: 0,
            data: req_body,
            status: true,
            success: true
        }
    } catch (e) {
        ctx.body = {
            code: -1,
            err: JSON.stringify(e),
            status: false,
            success: false
        }
    }
}



module.exports = { partnersearch, checkSafe, cacheExcl }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值