一、项目背景
基于图片文字识别的需求,在系统中实现此功能。
二、实现思路
前台上传需要识别的图片,后台接收到图片进行识别,将识别结果(文字字符串)返回给前台,本文着重介绍识别部分的简单实现,上传功能不做介绍。
三、实现过程
新问题往往需要使用新的技术去解决,过程也许是曲折艰辛的,但结果总是证明了我们的成长。直接进入正题吧,本文使用Tesseract-OCR,官方地址https://code.google.com/p/tesseract-ocr/,Maven仓库可参考http://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j/3.2.0
1.新建maven项目,在pom.xml中添加依赖包,并在lib路径下添加.dll文件
2.将.dll文件添加到windows系统中,将jai-imageio-core.jar加到jdk的lib中
3.项目中新建tessdata文件夹,放置字库等信息
(eng.traineddata文件对应英文字库,可将下载好的中文字库chi_sim.traineddata重命名替换eng.traineddata,就可解析中文,当然也有其他办法,可自行研究)
以上相当于配置tesseract-ocr环境
4.识别代码
/**
* 提交图片解析
* @param bpmProspectingStr
* 其他一并提交参数
* @return
*/
@ResponseBody
@RequestMapping(value = "mSave")
public Map<String,Object> mSave(HttpServletRequest request, HttpServletResponse response,String bpmProspectingStr) throws IOException {
Map<String, Object> map = Maps.newHashMap();
List<SystemInfo> result = Lists.newArrayList();
MultipartFile multipartFile = null;
CommonsMultipartResolver multipartResolver = new CommonsMultipartResolver(request.getSession().getServletContext());
if(!multipartResolver.isMultipart(request)) { //判断request是否有文件上传
result.add(new SystemInfo("error", AppInfo.getStateInfo(AppInfo.NOT_MULTIPART_CONTENT)));
return map;
}
//上传图片
String photoFileName = UploadUtil.mobileUpload(request, response, configJSONPath);
String contentPath = request.getContextPath();
String realPath = request.getSession().getServletContext().getRealPath(contentPath);
realPath = realPath.substring(0,realPath.lastIndexOf("smartinspection"));
String path="static/upload/";
realPath = realPath+path+photoFileName;
MultipartHttpServletRequest multiRequest = (MultipartHttpServletRequest) request; //multipartResolver.resolveMultipart(request);
Iterator<String> iterator = multiRequest.getFileNames();
while (iterator.hasNext()) {
multipartFile = multiRequest.getFile(iterator.next());
if (multipartFile != null) {
result.add(new SystemInfo("error", AppInfo.getStateInfo(AppInfo.NOTFOUND_UPLOAD_DATA)));
String reStr = getLocation(realPath);
map.put("reStr", reStr);
break;
}else{
continue;
}
}
map.put("status", "true");
return map;
}
private String getLocation(String filePath) throws IOException {
BufferedImage image = null;
File imagefile = null;
//you can either use URL or File for reading image using ImageIO
imagefile = new File(filePath);
File dataFile = imagefile;
File tempImage = ImageIOHelper.createImage(dataFile, "jpg");
if(dataFile.exists()){
ImageIO.scanForPlugins();
Tesseract instance = Tesseract.getInstance(); // JNA Interface Mapping
System.err.println("检验环境是否支持TIFF");
String format="tif";
Iterator<ImageReader> readers = ImageIO.getImageReadersByFormatName(format);
System.out.println(readers.hasNext());
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("tiff");
System.out.println(writers.hasNext());
try {
capctha = instance.doOCR(tempImage).trim();
System.err.println("解析结果capctha is: "+capctha);
} catch (TesseractException e) {
e.printStackTrace();
}
} else {
System.err.println("找不到文件");
}
return capctha;
}