本文探讨基于tesseract的多线程OCR服务器的JAVA实现,可同时对多个android手机客户端提供图片OCR服务
project源码下载 http://download.csdn.net/user/yangliuy
最近接手一个项目,项目的背景是要开发一个CS架构的发票真伪识别系统,客户端为android手机,采集发票图像传到服务器做OCR识别,识别出来的发票号码和发票密码发送到国税局官网发票真伪查询页面,然后将真伪信息返回给手机用户。为了开发一个多线程OCR服务器,我研究了JAVA图像处理及OCR技术。JAVA的强大的图形处理相关库如java.awt.image等为采集图像的裁剪、放缩、二值化、去噪等提供了良好的基础,而OCR主要采用了Goolge tesseract开源OCR引擎,tesseract安装在本地后可以用cmd命令行调用,而JAVA支持cmd命令的调用。此外还用到了JAVA线程池、互斥锁等多线程编程技术及socket等网络编程技术。源码如下
多线程Server端 Server.java
package com.serverMain;
import java.net.ServerSocket;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.io.IOException;
/**
* @author yangliuis@pku.edu.cn
*
*/
public class Server extends Thread{
private int port ;
private ServerSocket server;
private ExecutorService threadPool;//线程池
public Server(int port) {
super();
this.port = port;
}
public void startServer ()throws IOException{
server = new ServerSocket(port);
threadPool = Executors.newCachedThreadPool();
System.out.println("欢迎使用Helios系统,服务器启动");
this.start();
}
public void run(){
while(true){
try {
ServerRun task = new ServerRun(server.accept());
threadPool.execute(task);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Server server = new Server(8089);
server.startServer();
}
}
Server端任务ServerRun.java
package com.serverMain;
import java.net.Socket;
import java.net.URL;
import java.net.URLConnection;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import com.imageHandle.OperateImage;
import com.imageHandle.SoundBinImage;
/**
* @author yangliuis@pku.edu.cn
*
*/
public class ServerRun extends Thread implements Runnable{
private static Integer invoicePicNum = 0;//发票图片序号
//private static Integer captchasPicNum = 0;//验证码图片序号
private Socket socket;
private final String invoiceDir = "F://Helios//data//invoice_image//";
//private final String captchasDir = "F://Helios//data//captchas_image//";
public ServerRun(Socket socket){
this.socket = socket;
}
public void run(){
String invoicePicFilename = invoiceDir+"invoice_image_";
invoicePicFilename += invoicePicNum+".jpg";
try {
DataInputStream dis = new DataInputStream(socket.getInputStream());
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(invoicePicFilename));
byte buffer[] = new byte[1024];
int eof = 0;
while((eof = dis.read(buffer, 0, 1024)) != -1) {
bos.write(buffer, 0 ,e