packagecom.vue.demo.service.serviceimpl;importcom.vue.demo.service.OCRService;importnet.sourceforge.tess4j.Tesseract;importnet.sourceforge.tess4j.TesseractException;importnet.sourceforge.tess4j.util.ImageHelper;importorg.apache.commons.io.FileUtils;importorg.slf4j.Logger;importorg.slf4j.LoggerFactory;importorg.springframework.stereotype.Service;importorg.springframework.web.multipart.MultipartFile;importjavax.imageio.ImageIO;importjava.awt.image.BufferedImage;importjava.io.File;importjava.io.IOException;/***@authoryangwj
* @date 2020/4/1 9:29*/@Servicepublic class OCRServiceImpl implementsOCRService {private static final Logger ocrServiceImplLog = LoggerFactory.getLogger(OCRServiceImpl.class);
String language= "";/*** 方法一
*@paramfile
*@return
*/@OverridepublicString getCharacterFromPic(MultipartFile file) {//String modelPath = "D:\\software\\ocr-tesseract\\tessdata";
String modelPath = "/root/project/java/tesseract_model";
Tesseract tessreact= newTesseract();//需要指定训练集 训练集到https://github.com/tesseract-ocr/tessdata下载。
tessreact.setDatapath(modelPath);if(language.equals("ch")) {//注意 默认是英文识别,如果做中文识别,需要单独设置。
tessreact.setLanguage("chi_sim");
}try{
File imageFile= newFile(file.getOriginalFilename());
FileUtils.copyInputStreamToFile(file.getInputStream(), imageFile);
String result=tessreact.doOCR(imageFile);
ocrServiceImplLog.info(result);
System.out.println("----------------");
String handleResult= this.ocr(imageFile,modelPath);
ocrServiceImplLog.info(handleResult);return result+"----------------------------------\n\r"+handleResult;
}catch(TesseractException e) {
System.err.println(e.getMessage());
}catch(IOException e) {
e.printStackTrace();
}return null;
}
@OverridepublicString getLanguage(String language) {if(language == null || language == "") {return null;
}this.language =language;return "success";
}/*** 方法二
*@paramfile
*@parammodelPath
*@return
*/
privateString ocr(File file,String modelPath) {
String result= null;try{double start =System.currentTimeMillis();
BufferedImage textImage=ImageIO.read(file);//这里对图片黑白处理,增强识别率.这里先通过截图,截取图片中需要识别的部分
textImage =ImageHelper.convertImageToGrayscale(textImage);//图片锐化
textImage =ImageHelper.convertImageToBinary(textImage);//图片放大倍数,增强识别率(很多图片本身无法识别,放大5倍时就可以轻易识,但是考滤到客户电脑配置低,针式打印机打印不连贯的问题,这里就放大5倍)
textImage = ImageHelper.getScaledInstance(textImage, textImage.getWidth() * 1, textImage.getHeight() * 1);
textImage=ImageHelper.convertImageToBinary(textImage);
String saveImgPath= "/root/project/java/tesseract_model/temp_img";//String saveImgPath = "D:\\software\\ocr-tesseract\\img_tem\\temp.img";
ImageIO.write(textImage, "png", newFile(saveImgPath));
Tesseract instance= newTesseract();//设置训练库的位置//String modelPath = "/root/project/java/tesseract_model";
instance.setDatapath(modelPath);//中文识别
instance.setLanguage("chi_sim");
result=instance.doOCR(textImage);double end =System.currentTimeMillis();
System.out.println("耗时" + (end - start) / 1000 + " s");
}catch(Exception e) {
e.printStackTrace();
}returnresult;
}
}