在同类里面尝试了Aspose.ocr 和Tesseract,效果都没有PaddleOCR好。
感觉是PaddleOCR》Aspose.ocr 》Tesseract
首先下载
PaddleOCRSharp
然后调用,
cpu推理。
using PaddleOCRSharp;
using System.Text;
using System.Windows.Forms;
namespace WinFormsApp14
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
engine = CreateOCRParameter();// 这个只能引用一次,否则会出现内存一直增加的问题
}
public PaddleOCREngine engine;
public PaddleOCREngine CreateOCRParameter()
{
OCRParameter oCRParameter = new OCRParameter();
//oCRParameter.cpu_math_library_num_threads = 6;//预测并发线程数
// oCRParameter.enable_mkldnn = true;//web部署该值建议设置为0,否则出错,内存如果使用很大,建议该值也设置为0.
// oCRParameter.cls = false; //是否执行文字方向分类;默认false
// oCRParameter.det = true;//是否开启方向检测,用于检测识别180旋转
// oCRParameter.use_angle_cls = true;//是否开启方向检测,用于检测识别180旋转
// oCRParameter.det_db_score_mode = true;//是否使用多段线,即文字区域是用多段线还是用矩形,
//oCRParameter.det_db_unclip_ratio = 1.6f;
// oCRParameter.max_side_len = 960;
OCRModelConfig config = null;
/*
OCRModelConfig config = new OCRModelConfig();
string root = Environment.CurrentDirectory;
string modelPathroot = root + @"\inference";
config.det_infer = modelPathroot + @"\ch_PP-OCRv3_det_infer";
config.cls_infer = modelPathroot + @"\ch_ppocr_mobile_v2.0_cls_infer";
config.rec_infer = modelPathroot + @"\ch_PP-OCRv3_rec_infer";
config.keys = modelPathroot + @"\ppocr_keys.txt";
*/
PaddleOCREngine engine = new PaddleOCREngine(config, oCRParameter);
return engine;
}
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog openFileDialog1 = new OpenFileDialog();
richTextBox1.Text = "";
openFileDialog1.FileName = "";
openFileDialog1.Filter = "所有文件(*.*)|*.*";
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
OCRResult ocrResult = engine.DetectText(openFileDialog1.FileName);
StringBuilder sb1 = new StringBuilder();
foreach (var i in ocrResult.TextBlocks)
{
sb1.Append(i.Text + "\r\n");
}
richTextBox1.AppendText(sb1.ToString());
}
}
}
}
基本都能识别,就是{}识别不怎么好。