由于MODI组件是Office 2003自带的一个组件,所以在开发的机器中要安装Office 2003以上版本并选择安装MODI组件。在开发过程中,要外部引用MODI组件。在这里我们使用C#来进行MODI程序的开发和示例。
1、添加MODI组件
我们首先需要添加MODI引用到我们的工程文件中去。我们在添加引用库的时候,选择COM组件库,我们可以看到Microsoft Office DocumentImaging 12.0 Type Library组件(对应文件为MDIVWCTL.DLL)。添加成功后,我们可以在VS2013 C#的解决方案管理器的引用栏中看到MODI项,这就说明我们添加MODI成功了。
2、利用MODI组件中的Document对象
Document对象是MODI中最重要的一个对象,它提供了图片的引入、扫描等重要的方法。在这里我们首先创建一个MODI中的doc对象的一个实例:
MODI.Document doc = new MODI.Document();
然后把需要处理的文档图片准备好。图片的格式可以TIFF、BMP、PNG或者JPEG。当然我们有必要首先对图片进行一些必要的处理,尽量让图片干净、清晰。然后可以利用doc对象中的Create()方法,引入图片文档doc.Create(img_Path);其中img_Path为图片文档的路径。
在开发时需要引用如下图:
相关代码:
C_Upload代码
public class C_Upload : System.Web.UI.Page
{
Stopwatch sw = new Stopwatch();
/// <summary>
/// Upload_File
/// </summary>
/// <param name="filename">上传文件的要是压缩文件</param>
/// <returns>1--请选择文件后再点击上传</returns>
/// <returns>2--请上传后缀名为rar或zip的压缩文件</returns>
/// <returns>3--上传成功</returns>
/// <returns>4--解压失败</returns>
public int UploadFile(string filename)
{
FileUpload FileUpload1 = new FileUpload();
int bo = 0;
string fileName = FileUpload1.PostedFile.FileName;
if (string.IsNullOrEmpty(fileName))//判断是否有文件上传
{
bo = 1;
return bo;
}
string filePath = Server.MapPath("/UploadFiles/");
int subStart = fileName.LastIndexOf("\\") + 1;
fileName = fileName.Substring(subStart, fileName.Length - subStart);
if (fileName.IndexOf("rar") == -1 && fileName.IndexOf("zip") == -1)//判断压缩格式
{
bo = 2;
return bo;
}
string path = filePath + fileName;
string dirName = path.Substring(0, path.Length - 4);//通过压缩文件名得到压缩之后的文件名
FileUpload1.SaveAs(path);
bool b;
Other other = new Other();
b = other.DecompressionZipOrRar(path, filePath);
if (b)
{
if (Directory.Exists(dirName))
{
other.MoveToParentDirectory(dirName);
}
bo = 3;
}
else
{
bo = 4;
}
File.Delete(path);
return bo;
}
int l = 0;
string[] strTitle = new string[] { "企业注册号", "企业名称", "类型", "住所", "法定代表人", "成立时间", "注册资本", "营业期限", "经营范围", "登记机关", "核准时间" };
string[] strContent;
string[][] strC;
public int ExcelWriter1(String[][] myData, String Path)
{
int ttt = 0;
Microsoft.Office.Interop.Excel.Application excel = new Microsoft.Office.Interop.Excel.Application();
Microsoft.Office.Interop.Excel.Workbook workbook = excel.Application.Workbooks.Add(true);
for (int jj = 0; jj < strTitle.Length; jj++)
{
excel.Cells[1, jj + 1] = strTitle[jj]; //这里可换成其他数据类型
}
for (int i = 0; i < myData.Length; i++)
{
String[] DataRow = myData[i];
for (int j = 0; j < DataRow.Length; j++)
{
excel.Cells[i + 2, j + 1] = DataRow[j]; //这里可换成其他数据类型
}
}
object missing = System.Reflection.Missing.Value;
excel.DisplayAlerts = false;
workbook.SaveAs(Path);
workbook.Close(false, missing, missing);
excel.Quit();
ttt = 1;
return ttt;
}
string filename = "";
// 点击“转换”事件
public void ChildThread(string pathstr)
{
filename = "";
StringBuilder sb1 = new StringBuilder();
int t = 0;
// strContent = new string[txt_result.Items.Count + 1];
//for (t = 0; t < txt_result.Items.Count; t++)
{
StringBuilder sb = new StringBuilder();
string img_Path = pathstr;// txt_result.Items[t].ToString().Trim(); // 图片地址0
l++;
if (String.IsNullOrEmpty(img_Path))
{
// MessageBox.Show("请先输入图片地址!");
return;
}
try
{
MODI.Document doc = new MODI.Document();
doc.Create(img_Path);
MODI.Image image;
MODI.Layout layout;
doc.OCR(GetLanuageType("2052"), true, true); // 识别文字类型
for (int i = 0; i < doc.Images.Count; i++)
{
image = (MODI.Image)doc.Images[i];
layout = image.Layout;
sb.Append(layout.Text);
}
sb1.Append("==" + img_Path + "=="); sb1.Append("\r\n");
sb1.Append(ReplaceT(sb.ToString()));
}
catch (Exception ex)
{
sb1.Append("转换失败!详情:" + ex.Message);
}
}
setText(sb1.ToString(), filename);
//MessageBox.Show("识别完成:" + l.ToString() + " 个营业执照。");
}
public void changeerwei(string pathstr)
{
string[] fileNames = Directory.GetFiles(Server.MapPath(pathstr));
strC = new string[fileNames.Length][];
for (int i = 0; i < fileNames.Length; i++) //this.txt_result.Items.Count;
{
strC[i] = new string[] { "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11" };
try
{
MatchCollection matches, matches2;
string p18 = "\r\n";
string p19 = "\r\n\r\n";
matches = Regex.Matches(strContent[i], p18);
matches2 = Regex.Matches(strContent[i], p19);
int b1 = strContent[i].IndexOf(":");
strC[i][0] = strContent[i].Substring(b1 + 1, matches2[0].Index - b1 - 1);
string[] ss = Regex.Split(strContent[i], "\r\n\r\n", RegexOptions.IgnoreCase);
//strContent[i].Split("\r\n\r\n");
for (int j = 1; j <= 11; j++)
{
int bb = ss[j].IndexOf(":");
strC[i][j] = ss[j].Substring(bb + 1);
}
}
catch { ;}
}
Thread.Sleep(5000);
string paths = @"c:\抓取.xls";
int b = ExcelWriter1(strC, paths);
if (b == 1)
{
;
}
for (int i = 0; i < fileNames.Length; i++)
{
System.IO.File.Delete(fileNames[i]);
}
}
private MODI.MiLANGUAGES GetLanuageType(string sValue)
{
switch (sValue)
{
case "2052":
return MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED;
case "5":
return MODI.MiLANGUAGES.miLANG_CZECH;
case "6":
return MODI.MiLANGUAGES.miLANG_DANISH;
case "7":
return MODI.MiLANGUAGES.miLANG_GERMAN;
case "8":
return MODI.MiLANGUAGES.miLANG_GREEK;
case "9":
return MODI.MiLANGUAGES.miLANG_ENGLISH;
case "10":
return MODI.MiLANGUAGES.miLANG_SPANISH;
case "11":
return MODI.MiLANGUAGES.miLANG_FINNISH;
case "12":
return MODI.MiLANGUAGES.miLANG_FRENCH;
case "14":
return MODI.MiLANGUAGES.miLANG_HUNGARIAN;
case "16":
return MODI.MiLANGUAGES.miLANG_ITALIAN;
case "17":
return MODI.MiLANGUAGES.miLANG_JAPANESE;
case "18":
return MODI.MiLANGUAGES.miLANG_KOREAN;
case "19":
return MODI.MiLANGUAGES.miLANG_DUTCH;
case "20":
return MODI.MiLANGUAGES.miLANG_NORWEGIAN;
case "21":
return MODI.MiLANGUAGES.miLANG_POLISH;
case "22":
return MODI.MiLANGUAGES.miLANG_PORTUGUESE;
case "25":
return MODI.MiLANGUAGES.miLANG_RUSSIAN;
case "29":
return MODI.MiLANGUAGES.miLANG_SWEDISH;
case "31":
return MODI.MiLANGUAGES.miLANG_TURKISH;
case "1028":
return MODI.MiLANGUAGES.miLANG_CHINESE_TRADITIONAL;
default:
return MODI.MiLANGUAGES.miLANG_ENGLISH;
}
}
private string ReplaceT(string sb)
{
StringBuilder ReStr = new StringBuilder();
ReStr.Append("\r\n");
try
{
string[] str;
for (int i = 0; i <= strTitle.Length - 1; i++)
{
Regex r = new Regex(strTitle[i]);
Match m = r.Match(sb);
if (m.Success)
{
//下面两个取一种即可。
sb = sb.Replace(strTitle[i], "");
}
}
str = sb.ToString().Split(':');
for (int i = 0; i <= strTitle.Length - 1; i++)
{
ReStr.Append(strTitle[i]);
ReStr.Append(":");
ReStr.Append(str[i + 1]);
ReStr.Append("\r\n"); ReStr.Append("\r\n");
if (strTitle[i] == "企业名称")
{
filename = str[i + 1];
}
}
}
catch
{ }
ReStr.Append("\r\n"); ReStr.Append("\r\n");
ReStr.Append("==========================================================================================");
ReStr.Append("\r\n"); ReStr.Append("\r\n");
return ReStr.ToString();
}
private void setText(string content, string filename)
{
string FilePath = System.Environment.CurrentDirectory + "\\log\\" + filename + DateTime.Now.ToString("yyyyMMdd") + DateTime.Now.ToString("hhmmss") + ".txt";
FileStream fs; fs = new FileStream(FilePath, FileMode.Create, FileAccess.Write);
//fs = File.Open(FilePath,FileMode.Create,FileAccess.Write);
StreamWriter sw = new StreamWriter(fs);
string sl;
sl = content;
//把textbox里面的值赋给sl
//StreamWriter se = new StreamWriter(fs);
//开始写入
sw.Write(sl);//写
// 保存textBox1中所有内容(所有行)
/* foreach (string line in txt_result.Lines)
{
sw.WriteLine(line);
}*/
//关闭文件
sw.Flush();
sw.Close();
fs.Close();
// 提示用户:文件保存的位置和文件名
//MessageBox.Show("文件已成功保存到" + FilePath);
}
}
调用代码:
private void button1_Click(object sender, EventArgs e)
{
string img_Path = textBox2.Text.Trim();
if (this.openFileDialog1.ShowDialog() == DialogResult.OK)
{
this.listBox1.Text = this.openFileDialog1.FileName;
string[] ff = this.listBox1.Text.ToString().Split('\\');
for (int i = 0; i < ff.Length - 1; i++)
img_Path += ff[i] + "\\";
this.listBox1.Text = img_Path;
string[] f = System.IO.Directory.GetFiles(img_Path);
for (int j = 0; j < f.Length; j++)
{
this.listBox1.Items.Add(f[j]);
}
}
}
private void button2_Click(object sender, EventArgs e)
{
C_Upload uplib = new C_Upload();
for (int j = 0; j < this.listBox1.Items.Count; j++)
{
uplib.ChildThread(this.listBox1.Items[j].ToString());
}
}
3系统不安装office的情况下调用MODI
实现思路思路步骤:
(1)搞定MODI组件所需文件
(2)完成与MODI的COM组件相关的注册表项安装(添加)
(3)完成与Office相关的注册表项安装(添加)
3.1 MODI文件列表
MOID需要的支撑文件有:接口层:MDIVWCTL.DLL,MSPCORE.DLL,MSPGIMME.DLL,MSO.DLL,LATIN1.SHP等,数据层:JFONT.DAT,LOOKUP.DAT,OCRHC.DAT,OCRVC.DAT,TWGB32.DLL,SCCODE.UNI,SCPRINT.DAT,SCPRINT2.DAT等。
3.2 与MODI的COM组件相关的注册表项添加
COM相关就是与MODI的COM组件相关的注册表项,这个直接用regsvr32导入即可:启动命令行,进入MODI安装文件夹,执行下面的命令:
regsvr32 MDIVWCTL.DLL
regsvr32 MSPCORE.DLL
即可完成MODI COM组件的注册。
3.3 与Office相关的注册表项安装(添加)
[HKEY_CLASSES_ROOT\Installer\Components\61BA386016BD0C340BBEAC273D84FD5F]
“2052”=hex(7):76,00,55,00,70,00,41,00,56,00,53,00,2e,00,7d,00,58,00,25,00,21,
00,21,00,21,00,21,00,21,00,4d,00,4b,00,4b,00,53,00,6b,00,4f,00,43,00,52,00,
5f,00,32,00,30,00,35,00,32,00,3c,00,00,00,00,00
[HKEY_CLASSES_ROOT\Installer\Features\00002109F10040800000000000F01FEC]
“OCR_2052”=""
[HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\Installer\UserData\S-1-5-18\Products\00002109F10040800000000000F01FEC\Features]
“OCR_2052”="%mEMae,7q9*DXdU@EPi="
[HKEY_CLASSES_ROOT\Installer\Products\00002109710000000000000000F01FEC]
[HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\Installer\UserData\S-1-5-18\Components\3F745FF6A76FF2F4797DB74FC7B3FD8B]
“00002109710000000000000000F01FEC”=“C:\Program Files\Common Files\Microsoft Shared\MODI\12.0\XPAGE3C.DLL”
部分核心代码如下:
curpath = System.IO.Directory.GetCurrentDirectory();
curpath = curpath + "\\所必须的DLL";
propath = System.Environment.GetEnvironmentVariable("ProgramFiles");
soupath = propath + @"\Common Files\microsoft shared\MODI";
if (Directory.Exists(soupath) == false)
{ Directory.CreateDirectory(soupath); }
regpath = soupath;
string soufile, decfile;
decfile = curpath + @"\1、接口层\MDIVWCTL.DLL";
soufile = regpath + @"\MDIVWCTL.DLL";
File.Copy(decfile, soufile, true);
decfile = curpath + @"\1、接口层\MSO.DLL";
soufile = regpath + @"\MSO.DLL";
File.Copy(decfile, soufile, true);
string command = "regsvr32.exe " + regpath + @"\MDIVWCTL.DLL";
Process p = new Process();
p.StartInfo.FileName = "cmd.exe";
p.Start();
p.StandardInput.WriteLine("cd /d" + regpath);
p.StandardInput.WriteLine("regsvr32.exe MDIVWCTL.DLL");
p.StandardInput.WriteLine("exit");
p.Close();
if(MessageBox.Show("OCR文件拷贝已经完成,请重启计算机。","提示",MessageBoxButtons.YesNo)==DialogResult.Yes)
{ command = "shutdown -s -t 5";
Process p5 = new Process();
p5.StartInfo.FileName = "cmd.exe";
p5.StartInfo.Arguments = "/c " + command;
p5.Start(); }
以上是我使用MODI对图像文件中文字的提取。希望对大家有帮助。
下载地址:源码及资源库下载