效果
![fd9aae9fde94df858228f23b99b56ff4.png](https://img-blog.csdnimg.cn/img_convert/fd9aae9fde94df858228f23b99b56ff4.png)
项目
![324bfa7e1da7ebe79b8ea5e601548551.png](https://img-blog.csdnimg.cn/img_convert/324bfa7e1da7ebe79b8ea5e601548551.png)
代码
using Aspose.Cells;
using NLog;
using OpenCvSharp;
using OpenVINO.OCRService;
using Sdcb.OpenVINO;
using Sdcb.OpenVINO.PaddleOCR;
using Sdcb.OpenVINO.PaddleOCR.Models;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace OpenVINO.OCR
{
public partial class frmMain : Form
{
public frmMain()
{
InitializeComponent();
NLog.Windows.Forms.RichTextBoxTarget.ReInitializeAllTextboxes(this);
}
String startupPath;
private string excelFileFilter = "表格|*.xlsx;*.xls;";
private Logger log = NLog.LogManager.GetCurrentClassLogger();
CancellationTokenSource cts;
ConcurrentQueue<ImgInfo> ltImgInfo = new ConcurrentQueue<ImgInfo>();
ConcurrentQueue<ImgInfo> matQueue = new ConcurrentQueue<ImgInfo>();
bool saveImg = false;
bool saveOcr = false;
int ocrNum = 0;//完成OCR识别的数量
int totalCount = 0;//图片总数量
int downloadCount = 0;//图片下载数量
int vioIDCount = 0;//违规ID;
private void frmMain_Load(object sender, EventArgs e)
{
//初始化
startupPath = System.Windows.Forms.Application.StartupPath;
string detectionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_det_infer";
string classificationModelDir = startupPath + "\\inference\\ch_ppocr_mobile_v2.0_cls_infer";
string recognitionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_rec_infer";
string labelFilePath = startupPath + "\\inference\\ppocr_keys.txt";
FullOcrModel model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);
PaddleOcrOptions paddleOcrOptions = new PaddleOcrOptions();
paddleOcrOptions.DetectionDeviceOptions = new DeviceOptions("CPU");
paddleOcrOptions.DetectionStaticSize = new OpenCvSharp.Size(800, 800);
paddleOcrOptions.RecognitionStaticWidth = 512;
Program.paddleOcr = new PaddleOcrAll(model, paddleOcrOptions);
Program.paddleOcr.Detector.UnclipRatio = 1.5f;
Program.paddleOcr.AllowRotateDetection = true; /* 允许识别有角度的文字 */
Program.paddleOcr.Enable180Classification = false; /* 允许识别旋转角度大于90度的文字 */
ServicePointManager.Expect100Continue = false;
ServicePointManager.DefaultConnectionLimit = 512;
//加载违禁词
Common.ltRuleContains.Clear();
Common.ltRuleTel.Clear();
string ruleContainsPath = "rules\\rule_contains.txt";
if (File.Exists(ruleContainsPath))
{
Common.ltRuleContains = File.ReadAllLines(ruleContainsPath).ToList();
}
StringBuilder sb = new StringBuilder();
foreach (var item in Common.ltRuleContains)
{
sb.AppendLine(item);
}
log.Info("rule_contains.txt---->包含" + Common.ltRuleContains.Count() + "个违禁词,内容如下:\r\n" + sb.ToString());
string ruleTelPath = "rules\\rule_tel.txt";
if (File.Exists(ruleTelPath))
{
foreach (var item in File.ReadAllLines(ruleTelPath))
{
Common.ltRuleTel.Add(item.ToLower());
}
}
sb.Clear();
foreach (var item in Common.ltRuleTel)
{
sb.AppendLine(item);
}
log.Info("rule_tel.txt---->包含" + Common.ltRuleTel.Count() + "个号码前缀,内容如下:\r\n" + sb.ToString());
}
/// <summary>
/// 选择表格
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button2_Click(object sender, EventArgs e)
{
try
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = excelFileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
log.Info("解析中……");
Application.DoEvents();
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
string excelPath = ofd.FileName;
Workbook workbook = new Workbook(excelPath);
Cells cells = workbook.Worksheets[0].Cells;
System.Data.DataTable dataTable1 = cells.ExportDataTable(1, 0, cells.MaxDataRow, cells.MaxColumn + 1);//noneTitle
ltImgInfo = new ConcurrentQueue<ImgInfo>();
//遍历
ImgInfo temp;
int imgCount = 0;
foreach (DataRow row in dataTable1.Rows)
{
temp = new ImgInfo();
temp.id = row[0].ToString();
temp.title = row[1].ToString();
List<MatInfo> list = new List<MatInfo>();
for (int i = 2; i < cells.MaxColumn + 1; i++)
{
string tempStr = row[i].ToString();
if (!string.IsNullOrEmpty(tempStr))
{
if (i >= 7)
{
List<string> ltScrUrlTemp = Common.GetScrUrl(tempStr);
if (ltScrUrlTemp.Count > 0)
{
foreach (var item in ltScrUrlTemp)
{
MatInfo matInfo = new MatInfo();
matInfo.url = item;
list.Add(matInfo);
}
}
}
else
{
MatInfo matInfo = new MatInfo();
matInfo.url = tempStr;
list.Add(matInfo);
}
}
}
temp.images = list;
imgCount = imgCount + list.Count();
ltImgInfo.Enqueue(temp);
//for test
//if (ltImgInfo.Count()>10)
//{
// break;
//}
}
log.Info("解析完毕,一共[" + ltImgInfo.Count + "]条记录,[" + imgCount + "]张图片,耗时:" + sw.ElapsedMilliseconds + "毫秒");
}
catch (Exception ex)
{
log.Error("解析表格异常:" + ex.Message);
MessageBox.Show("解析表格异常:" + ex.Message);
}
}
void ShowCostTime(string total, string ocrNum, string downloadCount, long time, int vioIDCount)
{
txtTotal.Invoke(new Action(() =>
{
TimeSpan ts = TimeSpan.FromMilliseconds(time);
txtTotal.Text = string.Format("下载完成:{0}/{1},识别完成:{2}/{3},违规ID数量:{5},用时:{4}"
, downloadCount
, total
, ocrNum
, total
, ts.ToString()
, vioIDCount
);
}));
}
/// <summary>
/// 下载识别
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button1_Click(object sender, EventArgs e)
{
if (ltImgInfo.Count == 0)
{
MessageBox.Show("请先选择表格!");
return;
}
DialogResult result = MessageBox.Show("确认开始下载识别?此操作会清空上一次的数据,请注意备份!", "确认", MessageBoxButtons.YesNo, MessageBoxIcon.Question);
if (result == DialogResult.Yes)
{
log.Info("确认开始下载识别!");
}
else
{
log.Info("取消开始下载识别!");
return;
}
if (!Directory.Exists("img"))
{
Directory.CreateDirectory("img");
}
if (!Directory.Exists("ocr_result"))
{
Directory.CreateDirectory("ocr_result");
}
if (!Directory.Exists("result"))
{
Directory.CreateDirectory("result");
}
if (!Directory.Exists("result//img"))
{
Directory.CreateDirectory("result//img");
}
//清空结果
File.WriteAllText("result//result.txt", "");
File.WriteAllText("result//result_detail.txt", "");
// 清空文件夹中的文件
foreach (string filePath in Directory.GetFiles("result", "*", SearchOption.AllDirectories))
{
File.Delete(filePath);
}
// 写入列标题
File.WriteAllText("result//result.txt", "id\ttitel\tcontent\r\n");
btnStop.Enabled = true;
btnStart.Enabled = false;
chkSaveImg.Enabled = false;
chkSaveOcr.Enabled = false;
if (chkSaveImg.Checked)
{
saveImg = true;
}
else
{
saveImg = false;
}
if (chkSaveOcr.Checked)
{
saveOcr = true;
}
else
{
saveOcr = false;
}
Application.DoEvents();
cts = new CancellationTokenSource();
Stopwatch total = new Stopwatch();
total.Start(); //开始计时
// 清空队列
matQueue = new ConcurrentQueue<ImgInfo>();
//while (!matQueue.IsEmpty)
//{
// matQueue.TryDequeue(out _);
//}
ocrNum = 0;//完成OCR识别的数量
totalCount = ltImgInfo.Count();//图片总数量
downloadCount = 0;
//下载线程
int downloadThreadNum = 2;
for (int i = 0; i < downloadThreadNum; i++)
{
Task.Factory.StartNew(() =>
{
while (true)
{
//判断是否被取消;
if (cts.Token.IsCancellationRequested)
{
return;
}
if (downloadCount == totalCount)
{
log.Info("--------------------------------->下载完成!<----------------------------------");
return;
}
ImgInfo imgInfo = new ImgInfo();
if (ltImgInfo.TryDequeue(out imgInfo))
{
//队列容量大于50 休息一秒
if (matQueue.Count > 50)
{
System.Threading.Thread.Sleep(1000);
}
if (matQueue.Count > 100)
{
System.Threading.Thread.Sleep(2000);
}
int imagesCount = imgInfo.images.Count();
for (int j = 0; j < imagesCount; j++)
{
try
{
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
HttpWebRequest request = WebRequest.Create(imgInfo.images[j].url) as HttpWebRequest;
request.KeepAlive = false;
request.ServicePoint.Expect100Continue = false;
request.Timeout = 2000;// 2秒
request.ReadWriteTimeout = 2000;//2秒
request.ServicePoint.UseNagleAlgorithm = false;
request.ServicePoint.ConnectionLimit = 65500;
request.AllowWriteStreamBuffering = false;
request.Proxy = null;
request.CookieContainer = new CookieContainer();
request.CookieContainer.Add(new Cookie("AspxAutoDetectCookieSupport", "1") { Domain = new Uri(imgInfo.images[j].url).Host });
HttpWebResponse wresp = (HttpWebResponse)request.GetResponse();
Stream s = wresp.GetResponseStream();
Bitmap bmp = (Bitmap)System.Drawing.Image.FromStream(s);
s.Dispose();
wresp.Close();
wresp.Dispose();
request.Abort();
sw.Stop();
if (saveImg)
{
bmp.Save("img//" + imgInfo.id + "_" + j + ".jpg");
}
var mat = OpenCvSharp.Extensions.BitmapConverter.ToMat(bmp);
if (mat.Channels() == 4)
{
Cv2.CvtColor(mat, mat, ColorConversionCodes.BGRA2BGR);
}
imgInfo.images[j].mat = mat;
imgInfo.images[j].name = imgInfo.id + "_" + j;
if (saveImg)
{
bmp.Save("img//" + imgInfo.images[j].name + ".jpg");
}
log.Info(" " + imgInfo.images[j].name + "-->下载用时:" + sw.ElapsedMilliseconds + "毫秒");
}
catch (Exception ex)
{
log.Error("---->id:" + imgInfo.id + ",url[" + imgInfo.images[j].url + "],下载异常:" + ex.Message);
}
}
matQueue.Enqueue(imgInfo);
Interlocked.Increment(ref downloadCount);
}
}
}, TaskCreationOptions.LongRunning);
}
//识别线程
Task.Factory.StartNew(() =>
{
while (true)
{
//判断是否被取消;
if (cts.Token.IsCancellationRequested)
{
return;
}
if (ocrNum == totalCount)
{
log.Info("--------------------------------->识别完成!<----------------------------------");
return;
}
ImgInfo imgInfo = new ImgInfo();
if (matQueue.TryDequeue(out imgInfo))
{
Stopwatch perID = new Stopwatch();
perID.Start();//开始计时
int imagesCount = imgInfo.images.Count();
for (int j = 0; j < imagesCount; j++)
{
//Mat mat= imgInfo.images[j].mat;
Stopwatch sw = new Stopwatch();
sw.Start(); //开始计时
PaddleOcrResult ocrResult = null;
try
{
if (imgInfo.images[j].mat != null && (!imgInfo.images[j].mat.Empty()))
{
ocrResult = Program.paddleOcr.Run(imgInfo.images[j].mat);
sw.Stop();
log.Info(" " + imgInfo.images[j].name + "---->识别用时:" + sw.ElapsedMilliseconds + "毫秒");
//string ocrInfo = ocrResult.Text.ToString();
string ocrInfo = string.Join("\n", from x in ocrResult.Regions
where x.Score > 0.8
orderby x.Rect.Center.Y, x.Rect.Center.X
select x.Text);
if (saveOcr)
{
File.WriteAllText("ocr_result//" + imgInfo.images[j].name + ".txt", ocrInfo);
}
//规则校验
Stopwatch ruleSw = new Stopwatch();
ruleSw.Start();//开始计时
ocrInfo = ocrInfo.Trim();
ocrInfo = ocrInfo.Replace(" ", "");
string words = "";
string resultInfo = "";
if (Common.RuleContainsCheck(ocrInfo, out words, ocrResult))
{
resultInfo = string.Format("ID:{0},Title:[{1}],------>包含违禁词:{2}", imgInfo.id, imgInfo.title, words);
log.Info(resultInfo);
//存数据
File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t包含违禁词:" + words + "\r\n");
File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");
//存图
Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
imgInfo.images[j].mat.Dispose();
Interlocked.Increment(ref vioIDCount);
break;
}
if (Common.RuleTelCheck(ocrInfo, out words, ocrResult))
{
resultInfo = string.Format("ID:{0},Title:[{1}],------>疑似包含电话号码:{2}", imgInfo.id, imgInfo.title, words);
log.Info(resultInfo);
//File.AppendAllText("result//result.txt", resultInfo+ "\r\n");
File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t疑似包含电话号码:" + words + "\r\n");
File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");
//存图
Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
imgInfo.images[j].mat.Dispose();
Interlocked.Increment(ref vioIDCount);
break;
}
imgInfo.images[j].mat.Dispose();
ruleSw.Stop();
//log.Info(" " + imgInfo.images[j].name + "---->违禁词校验用时:" + ruleSw.ElapsedMilliseconds + "毫秒");
}
}
catch (Exception ex)
{
imgInfo.images[j].mat.Dispose();
log.Info(" " + imgInfo.images[j].name + "---->识别异常:" + ex.Message);
}
}
perID.Stop();
log.Info("---->id:" + imgInfo.id + ",图片张数[" + imagesCount + "],识别小计用时:" + perID.ElapsedMilliseconds + "毫秒");
Interlocked.Increment(ref ocrNum);
ShowCostTime(totalCount.ToString(), ocrNum.ToString(), downloadCount.ToString(), total.ElapsedMilliseconds, vioIDCount);
}
}
}, TaskCreationOptions.LongRunning);
}
/// <summary>
/// 停止
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button3_Click(object sender, EventArgs e)
{
cts.Cancel();
btnStop.Enabled = false;
btnStart.Enabled = true;
chkSaveImg.Enabled = true;
chkSaveOcr.Enabled = true;
}
}
}