using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Xml.Linq;
namespace NanChangCatch.Class
{
public class MyOcr
{
private string m_OcrPageID = "";
/// <summary>
/// https://msdn.microsoft.com/zh-cn/ff966472 error code
/// https://msdn.microsoft.com/zh-cn/library/office/ff796230.aspx
/// </summary>
/// <param name="s_ImgPath">路径 可以是网络路径 </param>
/// <param name="i_DelaySecond">延迟时间</param>
/// <param name="s_Message"></param>
/// <returns></returns>
public bool fnOCR(string s_ImgPath, int i_DelaySecond, out string s_Message)
{
s_Message = "";
WebClient webClient = new WebClient();
webClient.DownloadFile(s_ImgPath, "download");
using (Image im = Image.FromFile("download"))
{
using (MemoryStream ms = new MemoryStream())
{
//Image Type 只支持这些类型:auto|png|emf|jpg
string ImgExtension = "";
if (im.RawFormat.Guid == ImageFormat.Jpeg.Guid)
{
im.Save(ms, ImageFormat.Jpeg);
ImgExtension = "jpg";
}
else if (im.RawFormat.Guid == ImageFormat.Gif.Guid)
{
im.Save(ms, ImageFormat.Jpeg);
ImgExtension = "jpg";
}
else if (im.RawFormat.Guid == ImageFormat.Bmp.Guid)
{
im.Save(ms, ImageFormat.Bmp);
ImgExtension = "bmp";
}
else if (im.RawFormat.Guid == ImageFormat.Tiff.Guid)
{
im.Save(ms, ImageFormat.Tiff);
ImgExtension = "tiff";
}
else if (im.RawFormat.Guid == ImageFormat.Png.Guid)
{
im.Save(ms, ImageFormat.Png);
ImgExtension = "png";
}
else if (im.RawFormat.Guid == ImageFormat.Emf.Guid)
{
im.Save(ms, ImageFormat.Emf);
ImgExtension = "emf";
}
else
{
s_Message = "不支持的图片格式。";
return false;
}
byte[] buffer = ms.GetBuffer();
string _Base64 = Convert.ToBase64String(buffer);
var onenoteApp = new Microsoft.Office.Interop.OneNote.Application();
string notebookXml;
//先检查 是否已 建立过 page
if (m_OcrPageID == "")
{
//hsSections
onenoteApp.GetHierarchy(null, Microsoft.Office.Interop.OneNote.HierarchyScope.hsSections, out notebookXml, Microsoft.Office.Interop.OneNote.XMLSchema.xsCurrent);
var doc = XDocument.Parse(notebookXml);
var ns = doc.Root.Name.Namespace;
var sectionNode = doc.Descendants(ns + "Section").FirstOrDefault();
var sectionID = sectionNode.Attribute("ID").Value;
onenoteApp.CreateNewPage(sectionID, out m_OcrPageID);
}
//hsPages
onenoteApp.GetHierarchy(null, Microsoft.Office.Interop.OneNote.HierarchyScope.hsPages, out notebookXml, Microsoft.Office.Interop.OneNote.XMLSchema.xsCurrent);
var _pdoc = XDocument.Parse(notebookXml);
var _pns = _pdoc.Root.Name.Namespace;
var _page = new XDocument(new XElement(_pns + "Page", new XAttribute("ID", m_OcrPageID),
new XElement(_pns + "Outline",
new XElement(_pns + "OEChildren",
new XElement(_pns + "OE",
new XElement(_pns + "Image", new XAttribute("format", ImgExtension), new XAttribute("originalPageNumber", "0"),
new XElement(_pns + "Position", new XAttribute("x", "0"), new XAttribute("y", "0"), new XAttribute("z", "0")),
new XElement(_pns + "Size", new XAttribute("width", im.Width.ToString()), new XAttribute("height", im.Height.ToString())),
new XElement(_pns + "Data", _Base64)))))));
onenoteApp.UpdatePageContent(_page.ToString(), DateTime.MinValue);
//线程休眠时间,单位毫秒,若图片很大,则延长休眠时间,保证Onenote OCR完毕
System.Threading.Thread.Sleep(i_DelaySecond);
string _pageXml = "";
onenoteApp.GetPageContent(m_OcrPageID, out _pageXml, Microsoft.Office.Interop.OneNote.PageInfo.piBasic);
XDocument _OCRText = XDocument.Parse(_pageXml);
//抓取识别文字
IEnumerable<XElement> i_OCRText = _OCRText.Descendants(_pns + "OCRText");
if (i_OCRText.Count() > 0)
{
s_Message = i_OCRText.FirstOrDefault().Value;
}
onenoteApp.DeleteHierarchy(m_OcrPageID);
}
}
return true;
}
}
}
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Xml.Linq;
namespace NanChangCatch.Class
{
public class MyOcr
{
private string m_OcrPageID = "";
/// <summary>
/// https://msdn.microsoft.com/zh-cn/ff966472 error code
/// https://msdn.microsoft.com/zh-cn/library/office/ff796230.aspx
/// </summary>
/// <param name="s_ImgPath">路径 可以是网络路径 </param>
/// <param name="i_DelaySecond">延迟时间</param>
/// <param name="s_Message"></param>
/// <returns></returns>
public bool fnOCR(string s_ImgPath, int i_DelaySecond, out string s_Message)
{
s_Message = "";
WebClient webClient = new WebClient();
webClient.DownloadFile(s_ImgPath, "download");
using (Image im = Image.FromFile("download"))
{
using (MemoryStream ms = new MemoryStream())
{
//Image Type 只支持这些类型:auto|png|emf|jpg
string ImgExtension = "";
if (im.RawFormat.Guid == ImageFormat.Jpeg.Guid)
{
im.Save(ms, ImageFormat.Jpeg);
ImgExtension = "jpg";
}
else if (im.RawFormat.Guid == ImageFormat.Gif.Guid)
{
im.Save(ms, ImageFormat.Jpeg);
ImgExtension = "jpg";
}
else if (im.RawFormat.Guid == ImageFormat.Bmp.Guid)
{
im.Save(ms, ImageFormat.Bmp);
ImgExtension = "bmp";
}
else if (im.RawFormat.Guid == ImageFormat.Tiff.Guid)
{
im.Save(ms, ImageFormat.Tiff);
ImgExtension = "tiff";
}
else if (im.RawFormat.Guid == ImageFormat.Png.Guid)
{
im.Save(ms, ImageFormat.Png);
ImgExtension = "png";
}
else if (im.RawFormat.Guid == ImageFormat.Emf.Guid)
{
im.Save(ms, ImageFormat.Emf);
ImgExtension = "emf";
}
else
{
s_Message = "不支持的图片格式。";
return false;
}
byte[] buffer = ms.GetBuffer();
string _Base64 = Convert.ToBase64String(buffer);
var onenoteApp = new Microsoft.Office.Interop.OneNote.Application();
string notebookXml;
//先检查 是否已 建立过 page
if (m_OcrPageID == "")
{
//hsSections
onenoteApp.GetHierarchy(null, Microsoft.Office.Interop.OneNote.HierarchyScope.hsSections, out notebookXml, Microsoft.Office.Interop.OneNote.XMLSchema.xsCurrent);
var doc = XDocument.Parse(notebookXml);
var ns = doc.Root.Name.Namespace;
var sectionNode = doc.Descendants(ns + "Section").FirstOrDefault();
var sectionID = sectionNode.Attribute("ID").Value;
onenoteApp.CreateNewPage(sectionID, out m_OcrPageID);
}
//hsPages
onenoteApp.GetHierarchy(null, Microsoft.Office.Interop.OneNote.HierarchyScope.hsPages, out notebookXml, Microsoft.Office.Interop.OneNote.XMLSchema.xsCurrent);
var _pdoc = XDocument.Parse(notebookXml);
var _pns = _pdoc.Root.Name.Namespace;
var _page = new XDocument(new XElement(_pns + "Page", new XAttribute("ID", m_OcrPageID),
new XElement(_pns + "Outline",
new XElement(_pns + "OEChildren",
new XElement(_pns + "OE",
new XElement(_pns + "Image", new XAttribute("format", ImgExtension), new XAttribute("originalPageNumber", "0"),
new XElement(_pns + "Position", new XAttribute("x", "0"), new XAttribute("y", "0"), new XAttribute("z", "0")),
new XElement(_pns + "Size", new XAttribute("width", im.Width.ToString()), new XAttribute("height", im.Height.ToString())),
new XElement(_pns + "Data", _Base64)))))));
onenoteApp.UpdatePageContent(_page.ToString(), DateTime.MinValue);
//线程休眠时间,单位毫秒,若图片很大,则延长休眠时间,保证Onenote OCR完毕
System.Threading.Thread.Sleep(i_DelaySecond);
string _pageXml = "";
onenoteApp.GetPageContent(m_OcrPageID, out _pageXml, Microsoft.Office.Interop.OneNote.PageInfo.piBasic);
XDocument _OCRText = XDocument.Parse(_pageXml);
//抓取识别文字
IEnumerable<XElement> i_OCRText = _OCRText.Descendants(_pns + "OCRText");
if (i_OCRText.Count() > 0)
{
s_Message = i_OCRText.FirstOrDefault().Value;
}
onenoteApp.DeleteHierarchy(m_OcrPageID);
}
}
return true;
}
}
}