基于Tesseract-OCR实现的JAVA WEB版OCR(图片转文字)

首先在 Tesseract-OCR官网下载Tesseract-OCR 3.02,以及中文数据包chi_sim.traineddata(简体)

接下来就是新建一个JAVA EE项目,把Tesseract-OCR放在项目WebRoot下。

下面是主要代码:

接受客户端上传过来的图片,使用Tesseract-OCR识别后返回至前台。

  1. package servlet;  
  2.   
  3. import java.io.IOException;  
  4.   
  5. import javax.servlet.ServletConfig;  
  6. import javax.servlet.ServletException;  
  7. import javax.servlet.http.HttpServlet;  
  8. import javax.servlet.http.HttpServletRequest;  
  9. import javax.servlet.http.HttpServletResponse;  
  10.   
  11. import util.FileUtil;  
  12. import util.OCRUtil;  
  13.   
  14. import com.jspsmart.upload.File;  
  15. import com.jspsmart.upload.SmartUpload;  
  16. import com.jspsmart.upload.SmartUploadException;  
  17.   
  18.   
  19. public class OCRServlet extends HttpServlet {  
  20.   
  21.     public void doPost(HttpServletRequest request, HttpServletResponse response)  
  22.             throws ServletException, IOException {  
  23.         response.setCharacterEncoding("gbk");  
  24.         SmartUpload upload = new SmartUpload();  
  25.         ServletConfig sc = this.getServletConfig();  
  26.         upload.initialize(sc, request, response);  
  27.         File file = null;  
  28.         long size = 5*1024*1024;  
  29.         upload.setAllowedFilesList("gif,jpg,bmp,png");  
  30.         upload.setMaxFileSize(size);  
  31.         upload.setCharset("GBK");  
  32.         try {  
  33.             upload.upload();  
  34.             file = upload.getFiles().getFile(0);  
  35.             String userPath = "upload\\"+request.getRemoteAddr().replaceAll("\\.", "")+"\\";  
  36.             String svpath = userPath+file.getFileName();  
  37.             if(!file.isMissing()){  
  38.                 String realPath = request.getRealPath("/");  
  39.                 FileUtil.creatPath(realPath+userPath);  
  40.                 file.saveAs(svpath,SmartUpload.SAVE_VIRTUAL);  
  41.                 try {  
  42.                     OCRUtil.runOCR(realPath, realPath+svpath, realPath+userPath+"ocr",true);  
  43.                     request.setAttribute("txt", FileUtil.read(realPath+userPath+"ocr.txt").trim());  
  44.                     request.getRequestDispatcher("/index.jsp").forward(request, response);  
  45.                 } catch (Exception e) {  
  46.                     e.printStackTrace();  
  47.                 }  
  48.                 FileUtil.delete(realPath+userPath);  
  49.             }  
  50.         } catch (SmartUploadException e) {  
  51.             e.printStackTrace();  
  52.         }  
  53.     }  
  54.   
  55. }  
package servlet;

import java.io.IOException;

import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import util.FileUtil;
import util.OCRUtil;

import com.jspsmart.upload.File;
import com.jspsmart.upload.SmartUpload;
import com.jspsmart.upload.SmartUploadException;


public class OCRServlet extends HttpServlet {

	public void doPost(HttpServletRequest request, HttpServletResponse response)
			throws ServletException, IOException {
		response.setCharacterEncoding("gbk");
		SmartUpload upload = new SmartUpload();
		ServletConfig sc = this.getServletConfig();
		upload.initialize(sc, request, response);
		File file = null;
		long size = 5*1024*1024;
		upload.setAllowedFilesList("gif,jpg,bmp,png");
		upload.setMaxFileSize(size);
		upload.setCharset("GBK");
		try {
			upload.upload();
			file = upload.getFiles().getFile(0);
			String userPath = "upload\\"+request.getRemoteAddr().replaceAll("\\.", "")+"\\";
			String svpath = userPath+file.getFileName();
			if(!file.isMissing()){
				String realPath = request.getRealPath("/");
				FileUtil.creatPath(realPath+userPath);
				file.saveAs(svpath,SmartUpload.SAVE_VIRTUAL);
				try {
					OCRUtil.runOCR(realPath, realPath+svpath, realPath+userPath+"ocr",true);
					request.setAttribute("txt", FileUtil.read(realPath+userPath+"ocr.txt").trim());
					request.getRequestDispatcher("/index.jsp").forward(request, response);
				} catch (Exception e) {
					e.printStackTrace();
				}
				FileUtil.delete(realPath+userPath);
			}
		} catch (SmartUploadException e) {
			e.printStackTrace();
		}
	}

}
  1. package util;  
  2.   
  3. public class OCRUtil {  
  4.     public static String chiSIM = "chi_sim";  
  5.       
  6.     public static void runOCR(String realPath,String imagePath,String outPath,boolean isChi) throws Exception{  
  7.         Runtime r = Runtime.getRuntime();  
  8.         String cmd = "\""+realPath+"Tesseract-OCR\\tesseract.exe\" \""+imagePath+"\" \""+outPath+"\" -l "+(isChi?chiSIM:"");  
  9.         r.exec(cmd);  
  10.     }  
  11. }  
package util;

public class OCRUtil {
	public static String chiSIM = "chi_sim";
	
	public static void runOCR(String realPath,String imagePath,String outPath,boolean isChi) throws Exception{
		Runtime r = Runtime.getRuntime();
		String cmd = "\""+realPath+"Tesseract-OCR\\tesseract.exe\" \""+imagePath+"\" \""+outPath+"\" -l "+(isChi?chiSIM:"");
		r.exec(cmd);
	}
}
  1. package util;  
  2.   
  3. import java.io.BufferedReader;  
  4. import java.io.File;  
  5. import java.io.FileInputStream;  
  6. import java.io.IOException;  
  7. import java.io.InputStreamReader;  
  8.   
  9. public class FileUtil {  
  10.     public static String read(String path) throws IOException{  
  11.         String txt = "";  
  12.         File file = new File(path);  
  13.         long timeout = 30*60;  
  14.         while(!(file.isFile() && file.exists())){  
  15.             file = new File(path);  
  16.             try {  
  17.                 Thread.sleep(100);  
  18.                 timeout -= 100;  
  19.             } catch (InterruptedException e) {  
  20.                 e.printStackTrace();  
  21.             }  
  22.         }  
  23.         if (file.isFile() && file.exists()) {  
  24.             InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8");  
  25.             BufferedReader bReader = new BufferedReader(read);  
  26.             String temptxt = "";  
  27.             txt = "";  
  28.             while((temptxt=bReader.readLine())!=null){  
  29.                 txt += temptxt;  
  30.             }  
  31.             bReader.close();  
  32.             read.close();  
  33.         }  
  34.         return txt;  
  35.     }  
  36.       
  37.     public static void creatPath(String path) throws IOException{  
  38.         File file = new File(path);  
  39.         file.mkdir();  
  40.     }  
  41.       
  42.     public static void delete(String path) throws IOException{  
  43.         File file = new File(path);  
  44.         String[] list = file.list();  
  45.         File tempFile = null;  
  46.         for(String temp : list){  
  47.             tempFile = new File(path+temp);  
  48.             tempFile.delete();  
  49.         }  
  50.         file.delete();  
  51.     }  
  52. }  
package util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

public class FileUtil {
	public static String read(String path) throws IOException{
		String txt = "";
		File file = new File(path);
		long timeout = 30*60;
		while(!(file.isFile() && file.exists())){
			file = new File(path);
			try {
				Thread.sleep(100);
				timeout -= 100;
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
		if (file.isFile() && file.exists()) {
			InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8");
			BufferedReader bReader = new BufferedReader(read);
			String temptxt = "";
			txt = "";
			while((temptxt=bReader.readLine())!=null){
				txt += temptxt;
			}
			bReader.close();
			read.close();
		}
		return txt;
	}
	
	public static void creatPath(String path) throws IOException{
		File file = new File(path);
		file.mkdir();
	}
	
	public static void delete(String path) throws IOException{
		File file = new File(path);
		String[] list = file.list();
		File tempFile = null;
		for(String temp : list){
			tempFile = new File(path+temp);
			tempFile.delete();
		}
		file.delete();
	}
}

下面是JSP代码:

  1. <%@ page language="java" import="java.util.*" pageEncoding="GBK"%>  
  2.   
  3. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">  
  4. <html>  
  5.   <head>  
  6.     <title>在线OCR--By Lee</title>  
  7.       
  8.     <meta http-equiv="pragma" content="no-cache">  
  9.     <meta http-equiv="cache-control" content="no-cache">  
  10.     <meta http-equiv="expires" content="0">      
  11.     <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">  
  12.     <meta http-equiv="description" content="This is my page">  
  13.     <!--a  
  14.     <link rel="stylesheet" type="text/css" href="styles.css">  
  15.     -->  
  16.   
  17.   </head>  
  18.   <script type="text/javascript">  
  19.     function check(){  
  20.         var path = document.getElementById("image").value;  
  21.         if(path.length==0){  
  22.             alert("请选择要导入的图片!");  
  23.             return false;  
  24.         }  
  25.         if(!(path.match(/.jpg$/i)||path.match(/.bmp$/i)||path.match(/.gif$/i)||path.match(/.png$/i))){  
  26.             alert("只支持JPG,BMP,GIF,PNG格式!");  
  27.             return false;  
  28.         }  
  29.         return true;  
  30.     }  
  31.   </script>  
  32.   <body>  
  33.   <form enctype="multipart/form-data" method="post" action="OCRServlet" οnsubmit="return check();">  
  34.      选择文件:<input type="file" id="image" name="image"><br/>  
  35.      上传文件:<input type="submit" value="提交上传">  
  36.   </form>  
  37.   <textarea rows="20" cols="60"><%Object txt = request.getAttribute("txt");   
  38.     if(txt!=null&&txt.toString().length()==0){  
  39.         out.print("未识别出任何文字!");  
  40.     }else if(txt!=null){  
  41.         out.print(txt.toString());  
  42.     }  
  43.   %></textarea>  
  44.   </body>  
  45. </html>  
<%@ page language="java" import="java.util.*" pageEncoding="GBK"%>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <title>在线OCR--By Lee</title>
    
	<meta http-equiv="pragma" content="no-cache">
	<meta http-equiv="cache-control" content="no-cache">
	<meta http-equiv="expires" content="0">    
	<meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
	<meta http-equiv="description" content="This is my page">
	<!--a
	<link rel="stylesheet" type="text/css" href="styles.css">
	-->

  </head>
  <script type="text/javascript">
  	function check(){
  		var path = document.getElementById("image").value;
  		if(path.length==0){
  			alert("请选择要导入的图片!");
  			return false;
  		}
  		if(!(path.match(/.jpg$/i)||path.match(/.bmp$/i)||path.match(/.gif$/i)||path.match(/.png$/i))){
  			alert("只支持JPG,BMP,GIF,PNG格式!");
  			return false;
  		}
  		return true;
  	}
  </script>
  <body>
  <form enctype="multipart/form-data" method="post" action="OCRServlet" οnsubmit="return check();">
     选择文件:<input type="file" id="image" name="image"><br/>
     上传文件:<input type="submit" value="提交上传">
  </form>
  <textarea rows="20" cols="60"><%Object txt = request.getAttribute("txt"); 
  	if(txt!=null&&txt.toString().length()==0){
  		out.print("未识别出任何文字!");
  	}else if(txt!=null){
  		out.print(txt.toString());
  	}
  %></textarea>
  </body>
</html>

效果图:


在图片没做任何处理的情况下,识别率还是挺低的。。

快速回复 TOP
  • 1
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值