java web图片ocr,基于Tesseract-OCR实现的JAVA WEB版OCR(图片转文字)

首先在Tesseract-OCR官网下载Tesseract-OCR 3.02,以及中文数据包chi_sim.traineddata(简体)

接下来就是新建一个JAVA EE项目,把Tesseract-OCR放在项目WebRoot下。

下面是主要代码:

接受客户端上传过来的图片,使用Tesseract-OCR识别后返回至前台。

package servlet;

import java.io.IOException;

import javax.servlet.ServletConfig;

import javax.servlet.ServletException;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

import util.FileUtil;

import util.OCRUtil;

import com.jspsmart.upload.File;

import com.jspsmart.upload.SmartUpload;

import com.jspsmart.upload.SmartUploadException;

public class OCRServlet extends HttpServlet {

public void doPost(HttpServletRequest request, HttpServletResponse response)

throws ServletException, IOException {

response.setCharacterEncoding("gbk");

SmartUpload upload = new SmartUpload();

ServletConfig sc = this.getServletConfig();

upload.initialize(sc, request, response);

File file = null;

long size = 5*1024*1024;

upload.setAllowedFilesList("gif,jpg,bmp,png");

upload.setMaxFileSize(size);

upload.setCharset("GBK");

try {

upload.upload();

file = upload.getFiles().getFile(0);

String userPath = "upload\\"+request.getRemoteAddr().replaceAll("\\.", "")+"\\";

String svpath = userPath+file.getFileName();

if(!file.isMissing()){

String realPath = request.getRealPath("/");

FileUtil.creatPath(realPath+userPath);

file.saveAs(svpath,SmartUpload.SAVE_VIRTUAL);

try {

OCRUtil.runOCR(realPath, realPath+svpath, realPath+userPath+"ocr",true);

request.setAttribute("txt", FileUtil.read(realPath+userPath+"ocr.txt").trim());

request.getRequestDispatcher("/index.jsp").forward(request, response);

} catch (Exception e) {

e.printStackTrace();

}

FileUtil.delete(realPath+userPath);

}

} catch (SmartUploadException e) {

e.printStackTrace();

}

}

}

package util;

public class OCRUtil {

public static String chiSIM = "chi_sim";

public static void runOCR(String realPath,String imagePath,String outPath,boolean isChi) throws Exception{

Runtime r = Runtime.getRuntime();

String cmd = "\""+realPath+"Tesseract-OCR\\tesseract.exe\" \""+imagePath+"\" \""+outPath+"\" -l "+(isChi?chiSIM:"");

r.exec(cmd);

}

}

package util;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStreamReader;

public class FileUtil {

public static String read(String path) throws IOException{

String txt = "";

File file = new File(path);

long timeout = 30*60;

while(!(file.isFile() && file.exists())){

file = new File(path);

try {

Thread.sleep(100);

timeout -= 100;

} catch (InterruptedException e) {

e.printStackTrace();

}

}

if (file.isFile() && file.exists()) {

InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8");

BufferedReader bReader = new BufferedReader(read);

String temptxt = "";

txt = "";

while((temptxt=bReader.readLine())!=null){

txt += temptxt;

}

bReader.close();

read.close();

}

return txt;

}

public static void creatPath(String path) throws IOException{

File file = new File(path);

file.mkdir();

}

public static void delete(String path) throws IOException{

File file = new File(path);

String[] list = file.list();

File tempFile = null;

for(String temp : list){

tempFile = new File(path+temp);

tempFile.delete();

}

file.delete();

}

}

下面是JSP代码:

在线OCR--By Lee

function check(){

var path = document.getElementById("image").value;

if(path.length==0){

alert("请选择要导入的图片!");

return false;

}

if(!(path.match(/.jpg$/i)||path.match(/.bmp$/i)||path.match(/.gif$/i)||path.match(/.png$/i))){

alert("只支持JPG,BMP,GIF,PNG格式!");

return false;

}

return true;

}

选择文件:

上传文件:

if(txt!=null&&txt.toString().length()==0){

out.print("未识别出任何文字!");

}else if(txt!=null){

out.print(txt.toString());

}

%>

效果图:

9b46fb9f8aa29bb273f84b71ff39aa4b.bmp

在图片没做任何处理的情况下,识别率还是挺低的。。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值