opencv+tesseract破解教务管理验证码
折腾了一天,弄得差不多了,训练器还要继续调试(折腾)。
效果如图:
改了改
还是有好多不准的,需要继续训练,但是我实在训不下去了,看了四百多验证码的我已经要疯了…
训练的话参考这个
训练
顺便记录下powershell转gif到jpg的命令,ImageMagick挺好用的
Get-ChildItem . | ForEach-Object -Process{
if($_ -is [System.IO.FileInfo]){convert $_.name ($_.name + ".jpg")}}
- 程序:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using Emgu.CV;
using Emgu.CV.CvEnum;
using Emgu.CV.Structure;
using System.Drawing;
namespace jwglcode
{
/// <summary>
/// 教务管理验证码识别
/// </summary>
class jwglCode : IDisposable
{
/// <summary>
/// 原始验证码图片
/// </summary>
public Mat Code;
/// <summary>
/// 处理后图片
/// </summary>
public Mat CodeReduction;
/// <summary>
/// 存储单个字符的数组
/// </summary>
public Mat[] CodeSingleArr;
/// <summary>
/// 存储旋转后单个字符的数组
/// </summary>
public Mat[] CodeSingleRotateArr;
/// <summary>
/// 验证码长度
/// </summary>
public int CodeLength;
/// <summary>
/// 验证码数据(解析结果)
/// </summary>
public string CodeStr;
/// <summary>
/// 验证码识别
/// </summary>
/// <param name="src">原图像</param>
/// <param name="codeLength">验证码长度</param>
public jwglCode(Mat src,int codeLength = 4)
{
Code = src;
CodeLength = codeLength;
run();
}
/// <summary>
/// 判断数据是否在上下限内
/// </summary>
/// <param name="data">数据</param>
/// <param name="todata">目标值</param>
/// <param name="r">误差</param>
/// <returns></returns>
private bool rangeTest(byte data, byte todata, byte r)
{
if (data >= todata - r && data <= todata + r)
return true;
else
return false;
}
/// <summary>
/// 过滤干扰点
/// (如果一个点和他上下左右都不一样,则将此点颜色改为周围颜色)
/// </summary>
/// <param name="src">图像数据</param>
/// <returns></returns>
private Mat filter(Mat src)
{
Image<Gray, Byte> src1 = src.ToImage<Gray, Byte>();