java 图片解析文字_java从图片中识别文字

该博客介绍了如何使用Java进行图片中的文字识别。首先通过ImageIOHelper类将图片转换为TIFF格式,然后调用OCRUtil类配合Tesseract OCR工具识别图片中的文字。识别后的文字以文本形式返回。
摘要由CSDN通过智能技术生成

package com.dream.common;

import java.awt.image.BufferedImage;

import java.io.File;

import java.io.IOException;

import java.util.Iterator;

import java.util.Locale;

import javax.imageio.IIOImage;

import javax.imageio.ImageIO;

import javax.imageio.ImageReader;

import javax.imageio.ImageWriteParam;

import javax.imageio.ImageWriter;

import javax.imageio.metadata.IIOMetadata;

import javax.imageio.stream.FileImageInputStream;

import javax.imageio.stream.ImageInputStream;

import javax.imageio.stream.ImageOutputStream;

import com.github.jaiimageio.plugins.tiff.TIFFImageWriteParam;

/**

* 识别图片中的文字

*

* @author zlj

*

*/

public class ImageIOHelper {

/**

* 创建临时图片文件

*

* @param imageFile

* @return

* @throws IOException

*/

public File createImage(File imageFile) throws IOException {

Iterator<ImageReader> readers = ImageIO.getImageReaders(new FileImageInputStream(imageFile));

ImageReader reader = readers.next();

ImageInputStream iis = ImageIO.createImageInputStream(imageFile);

reader.setInput(iis);

IIOMetadata streamMetadata = reader.getStreamMetadata();

TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE);

tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);

Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("tiff");

ImageWriter writer = writers.next();

BufferedImage bi = reader.read(0);

IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0));

File tempFile = tempImageFile(imageFile);

ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);

writer.setOutput(ios);

writer.write(streamMetadata, image, tiffWriteParam);

ios.close();

iis.close();

writer.dispose();

reader.dispose();

return tempFile;

}

/**

* 添加后缀 tempfile

*

* @param imageFile

* @return

* @throws IOException

*/

private File tempImageFile(File imageFile) throws IOException {

String path = imageFile.getPath();

StringBuffer strB = new StringBuffer(path);

strB.insert(path.lastIndexOf('.'), "_text_recognize_temp");

String s = strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif");

Runtime.getRuntime().exec("attrib " + "\"" + s + "\"" + " +H"); // 设置文件隐藏

return new File(strB.toString());

}

}package com.dream.common;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import java.util.ArrayList;

import java.util.List;

import org.jdesktop.swingx.util.OS;

/**

* 从图片中识别文字

* @author zlj

*

*/

public class OCRUtil {

private final String LANG_OPTION = "-l"; // 英文字母小写l,并非数字1

private final String EOL = System.getProperty("line.separator");

private String tessPath = "C://Program Files (x86)//Tesseract-OCR";// ocr默认安装路径

private String transname = "chi_sim";// 默认中文语言包,识别中文

/**

* 从图片中识别文字

* @param imageFile

* @param imageFormat

* @return text recognized in image

* @throws Exception

*/

public String recognizeText(File imageFile) throws Exception {

File tempImage = new ImageIOHelper().createImage(imageFile);

return ocrImages(tempImage, imageFile);

}

/**

* 识别图片中的文字

* @param tempImage

* @param imageFile

* @return

* @throws IOException

* @throws InterruptedException

*/

private String ocrImages(File tempImage, File imageFile) throws IOException, InterruptedException {

File outputFile = new File(imageFile.getParentFile(), "output");

Runtime.getRuntime().exec("attrib " + "\"" + outputFile.getAbsolutePath() + "\"" + " +H"); // 设置文件隐藏

StringBuffer strB = new StringBuffer();

List<String> cmd = new ArrayList<String>();

if (OS.isWindowsXP()) {

cmd.add(tessPath + "//tesseract");

} else if (OS.isLinux()) {

cmd.add("tesseract");

} else {

cmd.add(tessPath + "//tesseract");

}

cmd.add("");

cmd.add(outputFile.getName());

cmd.add(LANG_OPTION);

cmd.add(transname);

ProcessBuilder pb = new ProcessBuilder();

pb.directory(imageFile.getParentFile());

cmd.set(1, tempImage.getName());

pb.command(cmd);

pb.redirectErrorStream(true);

Process process = pb.start();

int w = process.waitFor();

tempImage.delete();// 删除临时正在工作文件

if (w == 0) {

BufferedReader in = new BufferedReader(

new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath() + ".txt"), "UTF-8"));

String str;

while ((str = in.readLine()) != null) {

strB.append(str).append(EOL);

}

in.close();

} else {

String msg;

switch (w) {

case 1:

msg = "Errors accessing files.There may be spaces in your image's filename.";

break;

case 29:

msg = "Cannot recongnize the image or its selected region.";

break;

case 31:

msg = "Unsupported image format.";

break;

default:

msg = "Errors occurred.";

}

tempImage.delete();

throw new RuntimeException(msg);

}

new File(outputFile.getAbsolutePath() + ".txt").delete();

return strB.toString();

}

public static void main(String[] args) throws Exception {

System.out.println("begin");

String path = "F://test1.png";

String valCode = new OCRUtil().recognizeText(new File(path));

System.out.println(valCode);

System.out.println("end");

}

}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值