C# AI鉴图宝 利用OCR技术对图片进行是否违规判别

效果

fd9aae9fde94df858228f23b99b56ff4.png

项目

324bfa7e1da7ebe79b8ea5e601548551.png

代码

using Aspose.Cells;
using NLog;
using OpenCvSharp;
using OpenVINO.OCRService;
using Sdcb.OpenVINO;
using Sdcb.OpenVINO.PaddleOCR;
using Sdcb.OpenVINO.PaddleOCR.Models;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;

namespace OpenVINO.OCR
{
    public partial class frmMain : Form
    {
        public frmMain()
        {
            InitializeComponent();
            NLog.Windows.Forms.RichTextBoxTarget.ReInitializeAllTextboxes(this);
        }

        String startupPath;
        private string excelFileFilter = "表格|*.xlsx;*.xls;";
        private Logger log = NLog.LogManager.GetCurrentClassLogger();
        CancellationTokenSource cts;

        ConcurrentQueue<ImgInfo> ltImgInfo = new ConcurrentQueue<ImgInfo>();
        ConcurrentQueue<ImgInfo> matQueue = new ConcurrentQueue<ImgInfo>();

        bool saveImg = false;
        bool saveOcr = false;

        int ocrNum = 0;//完成OCR识别的数量
        int totalCount = 0;//图片总数量
        int downloadCount = 0;//图片下载数量
        int vioIDCount = 0;//违规ID;

        private void frmMain_Load(object sender, EventArgs e)
        {
            //初始化
            startupPath = System.Windows.Forms.Application.StartupPath;

            string detectionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_det_infer";
            string classificationModelDir = startupPath + "\\inference\\ch_ppocr_mobile_v2.0_cls_infer";
            string recognitionModelDir = startupPath + "\\inference\\ch_PP-OCRv3_rec_infer";
            string labelFilePath = startupPath + "\\inference\\ppocr_keys.txt";

            FullOcrModel model = FullOcrModel.FromDirectory(detectionModelDir, classificationModelDir, recognitionModelDir, labelFilePath, ModelVersion.V3);

            PaddleOcrOptions paddleOcrOptions = new PaddleOcrOptions();
            paddleOcrOptions.DetectionDeviceOptions = new DeviceOptions("CPU");
            paddleOcrOptions.DetectionStaticSize = new OpenCvSharp.Size(800, 800);
            paddleOcrOptions.RecognitionStaticWidth = 512;

            Program.paddleOcr = new PaddleOcrAll(model, paddleOcrOptions);
            Program.paddleOcr.Detector.UnclipRatio = 1.5f;
            Program.paddleOcr.AllowRotateDetection = true;    /* 允许识别有角度的文字 */
            Program.paddleOcr.Enable180Classification = false; /* 允许识别旋转角度大于90度的文字 */

            ServicePointManager.Expect100Continue = false;
            ServicePointManager.DefaultConnectionLimit = 512;

            //加载违禁词
            Common.ltRuleContains.Clear();
            Common.ltRuleTel.Clear();

            string ruleContainsPath = "rules\\rule_contains.txt";
            if (File.Exists(ruleContainsPath))
            {
                Common.ltRuleContains = File.ReadAllLines(ruleContainsPath).ToList();

            }
            StringBuilder sb = new StringBuilder();
            foreach (var item in Common.ltRuleContains)
            {
                sb.AppendLine(item);
            }
            log.Info("rule_contains.txt---->包含" + Common.ltRuleContains.Count() + "个违禁词,内容如下:\r\n" + sb.ToString());


            string ruleTelPath = "rules\\rule_tel.txt";
            if (File.Exists(ruleTelPath))
            {
                foreach (var item in File.ReadAllLines(ruleTelPath))
                {
                    Common.ltRuleTel.Add(item.ToLower());
                }
            }

            sb.Clear();
            foreach (var item in Common.ltRuleTel)
            {
                sb.AppendLine(item);
            }
            log.Info("rule_tel.txt---->包含" + Common.ltRuleTel.Count() + "个号码前缀,内容如下:\r\n" + sb.ToString());

        }

        /// <summary>
        /// 选择表格
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void button2_Click(object sender, EventArgs e)
        {
            try
            {
                OpenFileDialog ofd = new OpenFileDialog();
                ofd.Filter = excelFileFilter;
                if (ofd.ShowDialog() != DialogResult.OK) return;

                log.Info("解析中……");
                Application.DoEvents();

                Stopwatch sw = new Stopwatch();
                sw.Start();  //开始计时

                string excelPath = ofd.FileName;

                Workbook workbook = new Workbook(excelPath);
                Cells cells = workbook.Worksheets[0].Cells;
                System.Data.DataTable dataTable1 = cells.ExportDataTable(1, 0, cells.MaxDataRow, cells.MaxColumn + 1);//noneTitle

                ltImgInfo = new ConcurrentQueue<ImgInfo>();

                //遍历
                ImgInfo temp;
                int imgCount = 0;
                foreach (DataRow row in dataTable1.Rows)
                {
                    temp = new ImgInfo();
                    temp.id = row[0].ToString();
                    temp.title = row[1].ToString();

                    List<MatInfo> list = new List<MatInfo>();
                    for (int i = 2; i < cells.MaxColumn + 1; i++)
                    {

                        string tempStr = row[i].ToString();
                        if (!string.IsNullOrEmpty(tempStr))
                        {
                            if (i >= 7)
                            {
                                List<string> ltScrUrlTemp = Common.GetScrUrl(tempStr);
                                if (ltScrUrlTemp.Count > 0)
                                {
                                    foreach (var item in ltScrUrlTemp)
                                    {

                                        MatInfo matInfo = new MatInfo();
                                        matInfo.url = item;
                                        list.Add(matInfo);
                                    }
                                }
                            }
                            else
                            {
                                MatInfo matInfo = new MatInfo();
                                matInfo.url = tempStr;
                                list.Add(matInfo);
                            }
                        }
                    }
                    temp.images = list;
                    imgCount = imgCount + list.Count();
                    ltImgInfo.Enqueue(temp);

                    //for test
                    //if (ltImgInfo.Count()>10)
                    //{
                    //    break;
                    //}
                }
                log.Info("解析完毕,一共[" + ltImgInfo.Count + "]条记录,[" + imgCount + "]张图片,耗时:" + sw.ElapsedMilliseconds + "毫秒");
            }
            catch (Exception ex)
            {
                log.Error("解析表格异常:" + ex.Message);
                MessageBox.Show("解析表格异常:" + ex.Message);
            }
        }

        void ShowCostTime(string total, string ocrNum, string downloadCount, long time, int vioIDCount)
        {
            txtTotal.Invoke(new Action(() =>
            {
                TimeSpan ts = TimeSpan.FromMilliseconds(time);
                txtTotal.Text = string.Format("下载完成:{0}/{1},识别完成:{2}/{3},违规ID数量:{5},用时:{4}"
                    , downloadCount
                    , total
                    , ocrNum
                    , total
                    , ts.ToString()
                    , vioIDCount
                    );
            }));
        }

        /// <summary>
        /// 下载识别
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void button1_Click(object sender, EventArgs e)
        {
            if (ltImgInfo.Count == 0)
            {
                MessageBox.Show("请先选择表格!");
                return;
            }

            DialogResult result = MessageBox.Show("确认开始下载识别?此操作会清空上一次的数据,请注意备份!", "确认", MessageBoxButtons.YesNo, MessageBoxIcon.Question);
            if (result == DialogResult.Yes)
            {
                log.Info("确认开始下载识别!");
            }
            else
            {
                log.Info("取消开始下载识别!");
                return;
            }

            if (!Directory.Exists("img"))
            {
                Directory.CreateDirectory("img");
            }

            if (!Directory.Exists("ocr_result"))
            {
                Directory.CreateDirectory("ocr_result");
            }

            if (!Directory.Exists("result"))
            {
                Directory.CreateDirectory("result");
            }

            if (!Directory.Exists("result//img"))
            {
                Directory.CreateDirectory("result//img");
            }

            //清空结果
            File.WriteAllText("result//result.txt", "");
            File.WriteAllText("result//result_detail.txt", "");
            // 清空文件夹中的文件
            foreach (string filePath in Directory.GetFiles("result", "*", SearchOption.AllDirectories))
            {
                File.Delete(filePath);
            }

            // 写入列标题
            File.WriteAllText("result//result.txt", "id\ttitel\tcontent\r\n");

            btnStop.Enabled = true;
            btnStart.Enabled = false;
            chkSaveImg.Enabled = false;
            chkSaveOcr.Enabled = false;

            if (chkSaveImg.Checked)
            {
                saveImg = true;
            }
            else
            {
                saveImg = false;
            }

            if (chkSaveOcr.Checked)
            {
                saveOcr = true;
            }
            else
            {
                saveOcr = false;
            }

            Application.DoEvents();

            cts = new CancellationTokenSource();

            Stopwatch total = new Stopwatch();
            total.Start();  //开始计时

            // 清空队列
            matQueue = new ConcurrentQueue<ImgInfo>();
            //while (!matQueue.IsEmpty)
            //{
            //    matQueue.TryDequeue(out _);
            //}


            ocrNum = 0;//完成OCR识别的数量
            totalCount = ltImgInfo.Count();//图片总数量
            downloadCount = 0;

            //下载线程
            int downloadThreadNum = 2;
            for (int i = 0; i < downloadThreadNum; i++)
            {
                Task.Factory.StartNew(() =>
                {
                    while (true)
                    {
                        //判断是否被取消;
                        if (cts.Token.IsCancellationRequested)
                        {
                            return;
                        }

                        if (downloadCount == totalCount)
                        {
                            log.Info("--------------------------------->下载完成!<----------------------------------");
                            return;
                        }

                        ImgInfo imgInfo = new ImgInfo();
                        if (ltImgInfo.TryDequeue(out imgInfo))
                        {
                            //队列容量大于50 休息一秒
                            if (matQueue.Count > 50)
                            {
                                System.Threading.Thread.Sleep(1000);
                            }

                            if (matQueue.Count > 100)
                            {
                                System.Threading.Thread.Sleep(2000);
                            }

                            int imagesCount = imgInfo.images.Count();
                            for (int j = 0; j < imagesCount; j++)
                            {
                                try
                                {
                                    Stopwatch sw = new Stopwatch();
                                    sw.Start();  //开始计时
                                    HttpWebRequest request = WebRequest.Create(imgInfo.images[j].url) as HttpWebRequest;
                                    request.KeepAlive = false;
                                    request.ServicePoint.Expect100Continue = false;
                                    request.Timeout = 2000;// 2秒
                                    request.ReadWriteTimeout = 2000;//2秒

                                    request.ServicePoint.UseNagleAlgorithm = false;
                                    request.ServicePoint.ConnectionLimit = 65500;
                                    request.AllowWriteStreamBuffering = false;
                                    request.Proxy = null;

                                    request.CookieContainer = new CookieContainer();
                                    request.CookieContainer.Add(new Cookie("AspxAutoDetectCookieSupport", "1") { Domain = new Uri(imgInfo.images[j].url).Host });

                                    HttpWebResponse wresp = (HttpWebResponse)request.GetResponse();
                                    Stream s = wresp.GetResponseStream();
                                    Bitmap bmp = (Bitmap)System.Drawing.Image.FromStream(s);
                                    s.Dispose();
                                    wresp.Close();
                                    wresp.Dispose();
                                    request.Abort();

                                    sw.Stop();


                                    if (saveImg)
                                    {
                                        bmp.Save("img//" + imgInfo.id + "_" + j + ".jpg");
                                    }

                                    var mat = OpenCvSharp.Extensions.BitmapConverter.ToMat(bmp);

                                    if (mat.Channels() == 4)
                                    {
                                        Cv2.CvtColor(mat, mat, ColorConversionCodes.BGRA2BGR);
                                    }

                                    imgInfo.images[j].mat = mat;
                                    imgInfo.images[j].name = imgInfo.id + "_" + j;

                                    if (saveImg)
                                    {
                                        bmp.Save("img//" + imgInfo.images[j].name + ".jpg");
                                    }

                                    log.Info("  " + imgInfo.images[j].name + "-->下载用时:" + sw.ElapsedMilliseconds + "毫秒");
                                }
                                catch (Exception ex)
                                {
                                    log.Error("---->id:" + imgInfo.id + ",url[" + imgInfo.images[j].url + "],下载异常:" + ex.Message);
                                }
                            }
                            matQueue.Enqueue(imgInfo);
                            Interlocked.Increment(ref downloadCount);
                        }

                    }
                }, TaskCreationOptions.LongRunning);

            }

            //识别线程
            Task.Factory.StartNew(() =>
            {
                while (true)
                {
                    //判断是否被取消;
                    if (cts.Token.IsCancellationRequested)
                    {
                        return;
                    }

                    if (ocrNum == totalCount)
                    {
                        log.Info("--------------------------------->识别完成!<----------------------------------");
                        return;
                    }

                    ImgInfo imgInfo = new ImgInfo();
                    if (matQueue.TryDequeue(out imgInfo))
                    {

                        Stopwatch perID = new Stopwatch();
                        perID.Start();//开始计时
                        int imagesCount = imgInfo.images.Count();
                        for (int j = 0; j < imagesCount; j++)
                        {
                            //Mat mat= imgInfo.images[j].mat;
                            Stopwatch sw = new Stopwatch();
                            sw.Start();  //开始计时
                            PaddleOcrResult ocrResult = null;
                            try
                            {
                                if (imgInfo.images[j].mat != null && (!imgInfo.images[j].mat.Empty()))
                                {
                                    ocrResult = Program.paddleOcr.Run(imgInfo.images[j].mat);

                                    sw.Stop();
                                    log.Info("  " + imgInfo.images[j].name + "---->识别用时:" + sw.ElapsedMilliseconds + "毫秒");

                                    //string ocrInfo = ocrResult.Text.ToString();

                                    string ocrInfo = string.Join("\n", from x in ocrResult.Regions
                                                                       where x.Score > 0.8
                                                                       orderby x.Rect.Center.Y, x.Rect.Center.X
                                                                       select x.Text);

                                    if (saveOcr)
                                    {
                                        File.WriteAllText("ocr_result//" + imgInfo.images[j].name + ".txt", ocrInfo);
                                    }

                                    //规则校验
                                    Stopwatch ruleSw = new Stopwatch();
                                    ruleSw.Start();//开始计时
                                    ocrInfo = ocrInfo.Trim();
                                    ocrInfo = ocrInfo.Replace(" ", "");

                                    string words = "";
                                    string resultInfo = "";
                                    if (Common.RuleContainsCheck(ocrInfo, out words, ocrResult))
                                    {
                                        resultInfo = string.Format("ID:{0},Title:[{1}],------>包含违禁词:{2}", imgInfo.id, imgInfo.title, words);
                                        log.Info(resultInfo);

                                        //存数据
                                        File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t包含违禁词:" + words + "\r\n");
                                        File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");

                                        //存图
                                        Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
                                        imgInfo.images[j].mat.Dispose();

                                        Interlocked.Increment(ref vioIDCount);

                                        break;
                                    }

                                    if (Common.RuleTelCheck(ocrInfo, out words, ocrResult))
                                    {
                                        resultInfo = string.Format("ID:{0},Title:[{1}],------>疑似包含电话号码:{2}", imgInfo.id, imgInfo.title, words);
                                        log.Info(resultInfo);
                                        //File.AppendAllText("result//result.txt", resultInfo+ "\r\n");
                                        File.AppendAllText("result//result.txt", imgInfo.id + "\t" + imgInfo.title + "\t疑似包含电话号码:" + words + "\r\n");
                                        File.AppendAllText("result//result_detail.txt", "-------->\r\n" + resultInfo + ",识别内容" + ocrInfo + "\r\n<--------\r\n");

                                        //存图
                                        Cv2.ImWrite("result//img//" + imgInfo.images[j].name + ".jpg", imgInfo.images[j].mat);
                                        imgInfo.images[j].mat.Dispose();

                                        Interlocked.Increment(ref vioIDCount);

                                        break;
                                    }
                                    imgInfo.images[j].mat.Dispose();
                                    ruleSw.Stop();
                                    //log.Info("  " + imgInfo.images[j].name + "---->违禁词校验用时:" + ruleSw.ElapsedMilliseconds + "毫秒");
                                }
                            }
                            catch (Exception ex)
                            {
                                imgInfo.images[j].mat.Dispose();
                                log.Info("  " + imgInfo.images[j].name + "---->识别异常:" + ex.Message);
                            }
                        }

                        perID.Stop();
                        log.Info("---->id:" + imgInfo.id + ",图片张数[" + imagesCount + "],识别小计用时:" + perID.ElapsedMilliseconds + "毫秒");
                        Interlocked.Increment(ref ocrNum);
                        ShowCostTime(totalCount.ToString(), ocrNum.ToString(), downloadCount.ToString(), total.ElapsedMilliseconds, vioIDCount);

                    }
                }
            }, TaskCreationOptions.LongRunning);
        }

        /// <summary>
        /// 停止
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void button3_Click(object sender, EventArgs e)
        {
            cts.Cancel();
            btnStop.Enabled = false;
            btnStart.Enabled = true;

            chkSaveImg.Enabled = true;
            chkSaveOcr.Enabled = true;
        }
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值