Centos下编译安装TESSERACT3.0.x版本

1.首先我们来安装TESSERACT所需要的依赖库leptonica

tar -zxvf leptonica-1.69.tar.gz 
cd leptonica-1.69
./configure
make&&make install

2. 依赖安装完毕后开始安装tesseract

tar -zxvf tesseract-ocr-3.02.02.tar.gz
cd tesseract-ocr
./autogen.sh                            
./configure
make&&make install

3.安装中文和英文的识别库

tar -zxvf tesseract-ocr-3.02.eng.tar.gz
tar -zxvf tesseract-ocr-3.02.chi_sim.tar.gz 
mv tesseract-ocr/tessdata/* /usr/local/share/tessdata/
依赖jar包jai_imageio-1.1-alpha.jar、swingx-1.6.1.jar
package com.ocr;

import java.awt.Graphics2D;
import java.awt.color.ColorSpace;
import java.awt.geom.AffineTransform;
import java.awt.image.AffineTransformOp;
import java.awt.image.BufferedImage;
import java.awt.image.ColorConvertOp;
import java.awt.image.ColorModel;
import java.awt.image.MemoryImageSource;
import java.awt.image.PixelGrabber;

/**
 * 
 * 图像过滤,增强OCR识别成功率
 * 
 */
public class ImageFilter {
	private BufferedImage image;
	private int iw, ih;
	private int[] pixels;

	public ImageFilter(BufferedImage image) {
		this.image = image;
		iw = image.getWidth();
		ih = image.getHeight();
		pixels = new int[iw * ih];
	}

	/** 图像二值化 */
	public BufferedImage changeGrey() {
		PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih,
				pixels, 0, iw);
		try {
			pg.grabPixels();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
		// 设定二值化的域值,默认值为100
		int grey = 100;
		// 对图像进行二值化处理,Alpha值保持不变
		ColorModel cm = ColorModel.getRGBdefault();

		for (int i = 0; i < iw * ih; i++) {

			int red, green, blue;

			int alpha = cm.getAlpha(pixels[i]);

			if (cm.getRed(pixels[i]) > grey) {

				red = 255;

			} else {

				red = 0;

			}
			if (cm.getGreen(pixels[i]) > grey) {

				green = 255;

			} else {

				green = 0;
			}

			if (cm.getBlue(pixels[i]) > grey) {

				blue = 255;

			} else {

				blue = 0;

			}
			pixels[i] = alpha << 24 | red << 16 | green << 8 | blue;

		}
		// 将数组中的象素产生一个图像
		return ImageIOHelper
				.imageProducerToBufferedImage(new MemoryImageSource(iw, ih,
						pixels, 0, iw));

	}

	/** 提升清晰度,进行锐化 */
	public BufferedImage sharp() {
		PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih,
				pixels, 0, iw);
		try {
			pg.grabPixels();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
		// 象素的中间变量
		int tempPixels[] = new int[iw * ih];
		for (int i = 0; i < iw * ih; i++) {
			tempPixels[i] = pixels[i];
		}
		// 对图像进行尖锐化处理,Alpha值保持不变
		ColorModel cm = ColorModel.getRGBdefault();
		for (int i = 1; i < ih - 1; i++) {

			for (int j = 1; j < iw - 1; j++) {

				int alpha = cm.getAlpha(pixels[i * iw + j]);

				// 对图像进行尖锐化
				int red6 = cm.getRed(pixels[i * iw + j + 1]);
				int red5 = cm.getRed(pixels[i * iw + j]);
				int red8 = cm.getRed(pixels[(i + 1) * iw + j]);
				int sharpRed = Math.abs(red6 - red5) + Math.abs(red8 - red5);
				int green5 = cm.getGreen(pixels[i * iw + j]);
				int green6 = cm.getGreen(pixels[i * iw + j + 1]);
				int green8 = cm.getGreen(pixels[(i + 1) * iw + j]);
				int sharpGreen = Math.abs(green6 - green5)
						+ Math.abs(green8 - green5);
				int blue5 = cm.getBlue(pixels[i * iw + j]);
				int blue6 = cm.getBlue(pixels[i * iw + j + 1]);
				int blue8 = cm.getBlue(pixels[(i + 1) * iw + j]);
				int sharpBlue = Math.abs(blue6 - blue5)
						+ Math.abs(blue8 - blue5);
				if (sharpRed > 255) {
					sharpRed = 255;
				}
				if (sharpGreen > 255) {
					sharpGreen = 255;
				}
				if (sharpBlue > 255) {
					sharpBlue = 255;
				}
				tempPixels[i * iw + j] = alpha << 24 | sharpRed << 16
						| sharpGreen << 8 | sharpBlue;
			}
		}
		// 将数组中的象素产生一个图像

		return ImageIOHelper
				.imageProducerToBufferedImage(new MemoryImageSource(iw, ih,
						tempPixels, 0, iw));
	}

	/** 中值滤波 */
	public BufferedImage median() {
		PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih,
				pixels, 0, iw);
		try {
			pg.grabPixels();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
		// 对图像进行中值滤波,Alpha值保持不变
		ColorModel cm = ColorModel.getRGBdefault();
		for (int i = 1; i < ih - 1; i++) {
			for (int j = 1; j < iw - 1; j++) {
				int red, green, blue;
				int alpha = cm.getAlpha(pixels[i * iw + j]);
				// int red2 = cm.getRed(pixels[(i - 1) * iw + j]);
				int red4 = cm.getRed(pixels[i * iw + j - 1]);
				int red5 = cm.getRed(pixels[i * iw + j]);
				int red6 = cm.getRed(pixels[i * iw + j + 1]);
				// int red8 = cm.getRed(pixels[(i + 1) * iw + j]);
				// 水平方向进行中值滤波
				if (red4 >= red5) {
					if (red5 >= red6) {
						red = red5;
					} else {
						if (red4 >= red6) {
							red = red6;
						} else {
							red = red4;
						}
					}
				} else {
					if (red4 > red6) {
						red = red4;
					} else {
						if (red5 > red6) {
							red = red6;
						} else {
							red = red5;
						}
					}
				}
				// int green2 = cm.getGreen(pixels[(i - 1) * iw + j]);
				int green4 = cm.getGreen(pixels[i * iw + j - 1]);
				int green5 = cm.getGreen(pixels[i * iw + j]);
				int green6 = cm.getGreen(pixels[i * iw + j + 1]);
				// int green8 = cm.getGreen(pixels[(i + 1) * iw + j]);
				// 水平方向进行中值滤波
				if (green4 >= green5) {
					if (green5 >= green6) {
						green = green5;

					} else {

						if (green4 >= green6) {

							green = green6;

						} else {

							green = green4;

						}

					}

				} else {

					if (green4 > green6) {

						green = green4;

					} else {

						if (green5 > green6) {

							green = green6;

						} else {

							green = green5;

						}

					}

				}
				// int blue2 = cm.getBlue(pixels[(i - 1) * iw + j]);

				int blue4 = cm.getBlue(pixels[i * iw + j - 1]);

				int blue5 = cm.getBlue(pixels[i * iw + j]);

				int blue6 = cm.getBlue(pixels[i * iw + j + 1]);

				// int blue8 = cm.getBlue(pixels[(i + 1) * iw + j]);
				// 水平方向进行中值滤波

				if (blue4 >= blue5) {

					if (blue5 >= blue6) {

						blue = blue5;

					} else {

						if (blue4 >= blue6) {

							blue = blue6;

						} else {

							blue = blue4;

						}

					}

				} else {

					if (blue4 > blue6) {

						blue = blue4;

					} else {

						if (blue5 > blue6) {

							blue = blue6;

						} else {

							blue = blue5;

						}

					}

				}

				pixels[i * iw + j] = alpha << 24 | red << 16 | green << 8
						| blue;

			}

		}

		// 将数组中的象素产生一个图像

		return ImageIOHelper
				.imageProducerToBufferedImage(new MemoryImageSource(iw, ih,
						pixels, 0, iw));

	}

	/** 线性灰度变换 */

	public BufferedImage lineGrey() {

		PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih,
				pixels, 0, iw);

		try {

			pg.grabPixels();

		} catch (InterruptedException e) {

			e.printStackTrace();

		}

		// 对图像进行进行线性拉伸,Alpha值保持不变

		ColorModel cm = ColorModel.getRGBdefault();

		for (int i = 0; i < iw * ih; i++) {

			int alpha = cm.getAlpha(pixels[i]);

			int red = cm.getRed(pixels[i]);

			int green = cm.getGreen(pixels[i]);

			int blue = cm.getBlue(pixels[i]);

			// 增加了图像的亮度

			red = (int) (1.1 * red + 30);

			green = (int) (1.1 * green + 30);

			blue = (int) (1.1 * blue + 30);

			if (red >= 255) {

				red = 255;

			}

			if (green >= 255) {

				green = 255;

			}

			if (blue >= 255) {

				blue = 255;

			}

			pixels[i] = alpha << 24 | red << 16 | green << 8 | blue;

		}

		// 将数组中的象素产生一个图像

		return ImageIOHelper
				.imageProducerToBufferedImage(new MemoryImageSource(iw, ih,
						pixels, 0, iw));

	}

	/** 转换为黑白灰度图 */

	public BufferedImage grayFilter() {

		ColorSpace cs = ColorSpace.getInstance(ColorSpace.CS_GRAY);

		ColorConvertOp op = new ColorConvertOp(cs, null);

		return op.filter(image, null);

	}

	/** 平滑缩放 */

	public BufferedImage scaling(double s) {

		AffineTransform tx = new AffineTransform();

		tx.scale(s, s);

		AffineTransformOp op = new AffineTransformOp(tx,
				AffineTransformOp.TYPE_BILINEAR);

		return op.filter(image, null);

	}

	public BufferedImage scale(Float s) {

		int srcW = image.getWidth();

		int srcH = image.getHeight();

		int newW = Math.round(srcW * s);

		int newH = Math.round(srcH * s);

		// 先做水平方向上的伸缩变换

		BufferedImage tmp = new BufferedImage(newW, newH, image.getType());

		Graphics2D g = tmp.createGraphics();

		for (int x = 0; x < newW; x++) {

			g.setClip(x, 0, 1, srcH);

			// 按比例放缩

			g.drawImage(image, x - x * srcW / newW, 0, null);

		}

		// 再做垂直方向上的伸缩变换

		BufferedImage dst = new BufferedImage(newW, newH, image.getType());

		g = dst.createGraphics();

		for (int y = 0; y < newH; y++) {

			g.setClip(0, y, newW, 1);

			// 按比例放缩

			g.drawImage(tmp, 0, y - y * srcH / newH, null);

		}

		return dst;
	}

}

package com.ocr;

import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;

import java.awt.image.ImageProducer;

import java.awt.image.WritableRaster;

import java.io.File;

import java.io.IOException;

import java.util.Iterator;

import java.util.Locale;

import javax.imageio.IIOImage;

import javax.imageio.ImageIO;

import javax.imageio.ImageReader;

import javax.imageio.ImageWriteParam;

import javax.imageio.ImageWriter;

import javax.imageio.metadata.IIOMetadata;

import javax.imageio.stream.ImageInputStream;

import javax.imageio.stream.ImageOutputStream;

import javax.swing.JOptionPane;

import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;

public class ImageIOHelper {

	public ImageIOHelper() {

	}

	public static File createImage(File imageFile, String imageFormat) {

		File tempFile = null;

		try {

			Iterator<ImageReader> readers = ImageIO
					.getImageReadersByFormatName(imageFormat);

			ImageReader reader = readers.next();

			ImageInputStream iis = ImageIO.createImageInputStream(imageFile);

			reader.setInput(iis);

			// Read the stream metadata

			IIOMetadata streamMetadata = reader.getStreamMetadata();

			// Set up the writeParam
			TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(
					Locale.US);
			tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);

			// Get tif writer and set output to file

			Iterator<ImageWriter> writers = ImageIO
					.getImageWritersByFormatName("tiff");

			ImageWriter writer = writers.next();

			BufferedImage bi = reader.read(0);

			IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0));

			tempFile = tempImageFile(imageFile);

			ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);

			writer.setOutput(ios);

			writer.write(streamMetadata, image, tiffWriteParam);
			ios.close();
			writer.dispose();
			reader.dispose();
		} catch (Exception exc) {

			exc.printStackTrace();

		}

		return tempFile;

	}

	public static File createImage(BufferedImage bi) {

		File tempFile = null;

		try {
			tempFile = File.createTempFile("tempImageFile", ".tif");
			tempFile.deleteOnExit();
			TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(
					Locale.US);

			tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
			// Get tif writer and set output to file

			Iterator<ImageWriter> writers = ImageIO
					.getImageWritersByFormatName("tiff");

			ImageWriter writer = writers.next();

			IIOImage image = new IIOImage(bi, null, null);

			tempFile = tempImageFile(tempFile);

			ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);

			writer.setOutput(ios);

			writer.write(null, image, tiffWriteParam);

			ios.close();

			writer.dispose();

		} catch (Exception exc) {

			exc.printStackTrace();

		}

		return tempFile;

	}

	public static File tempImageFile(File imageFile) {

		String path = imageFile.getPath();

		StringBuffer strB = new StringBuffer(path);

		strB.insert(path.lastIndexOf('.'), 0);

		return new File(strB.toString().replaceFirst("(?<=//.)(//w+)$", "tif"));

	}

	public static BufferedImage getImage(File imageFile) {

		BufferedImage al = null;

		try {

			String imageFileName = imageFile.getName();

			String imageFormat = imageFileName.substring(imageFileName
					.lastIndexOf('.') + 1);

			Iterator<ImageReader> readers = ImageIO
					.getImageReadersByFormatName(imageFormat);

			ImageReader reader = readers.next();
			if (reader == null) {

				JOptionPane
						.showConfirmDialog(null,

						"Need to install JAI Image I/O package./nhttps://jai-imageio.dev.java.net");

				return null;

			}
			ImageInputStream iis = ImageIO.createImageInputStream(imageFile);
			reader.setInput(iis);
			al = reader.read(0);
			reader.dispose();

		} catch (IOException ioe) {
			System.err.println(ioe.getMessage());
		} catch (Exception e) {
			System.err.println(e.getMessage());
		}
		return al;
	}

	public static BufferedImage imageToBufferedImage(Image image) {
		BufferedImage bufferedImage = new BufferedImage(image.getWidth(null),
				image.getHeight(null), BufferedImage.TYPE_INT_RGB);
		Graphics2D g = bufferedImage.createGraphics();
		g.drawImage(image, 0, 0, null);
		return bufferedImage;
	}

	public static BufferedImage imageProducerToBufferedImage(
			ImageProducer imageProducer) {
		return imageToBufferedImage(Toolkit.getDefaultToolkit().createImage(
				imageProducer));
	}

	public static byte[] image_byte_data(BufferedImage image) {
		WritableRaster raster = image.getRaster();
		DataBufferByte buffer = (DataBufferByte) raster.getDataBuffer();
		return buffer.getData();
	}
}

package com.ocr;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.jdesktop.swingx.util.OS;

public class OCR {

	private final String LANG_OPTION = "-l";

	private final String EOL = System.getProperty("line.separator");

	//private String tessPath = new File("tesseract").getAbsolutePath();

	private String tessPath="D:\\Program Files (x86)";

	public String recognizeText(File imageFile, String imageFormat)
			throws Exception {

		File tempImage = ImageIOHelper.createImage(imageFile, imageFormat);

		File outputFile = new File(imageFile.getParentFile(), "output");

		StringBuffer strB = new StringBuffer();

		List cmd = new ArrayList();

		if (OS.isWindowsXP()) {

			cmd.add(tessPath + "//tesseract");

			// cmd.add(tessPath + "//Tesseract-OCR");

		} else if (OS.isLinux()) {

			cmd.add("tesseract");
		} else {
			 cmd.add(tessPath + "//Tesseract-OCR//tesseract");

			//cmd.add(tessPath + "//tesseract");
		}
		cmd.add("");
		cmd.add(outputFile.getName());
		cmd.add(LANG_OPTION);
		cmd.add("chi_sim");
		cmd.add("eng");
		ProcessBuilder pb = new ProcessBuilder();
		pb.directory(imageFile.getParentFile());
		cmd.set(1, tempImage.getName());
		pb.command(cmd);
		pb.redirectErrorStream(true);
		Process process = pb.start();
		// tesseract.exe 1.jpg 1 -l chi_sim
		int w = process.waitFor();
		// delete temp working files
		tempImage.delete();
		if (w == 0) {
			BufferedReader in = new BufferedReader(new InputStreamReader(
					new FileInputStream(outputFile.getAbsolutePath() + ".txt"),
					"UTF-8"));
			String str;
			while ((str = in.readLine()) != null) {
				strB.append(str).append(EOL);
			}
			in.close();
		} else {
			String msg;
			switch (w) {
			case 1:
				msg = "Errors accessing files. There may be spaces in your image's filename.";
				break;
			case 29:
				msg = "Cannot recognize the image or its selected region.";
				break;
			case 31:
				msg = "Unsupported image format.";
				break;
			default:
				msg = "Errors occurred.";
			}
			tempImage.delete();
			throw new RuntimeException(msg);
		}
		new File(outputFile.getAbsolutePath() + ".txt").delete();
		return strB.toString();
	}

}

package com.ocr;

import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;

import javax.imageio.ImageIO;

public class ClearImageHelper {
	public static void main(String[] args) throws IOException {

		File testDataDir = new File("testdata");
		final String destDir = testDataDir.getAbsolutePath() + "/tmp";
		for (File file : testDataDir.listFiles()) {
			cleanImage(file, destDir);
		}

	}

	/**
	 * 
	 * @param sfile
	 *            需要去噪的图像
	 * @param destDir
	 *            去噪后的图像保存地址
	 * @throws IOException
	 */
	public static void cleanImage(File sfile, String destDir)
			throws IOException {
		File destF = new File(destDir);
		if (!destF.exists()) {
			destF.mkdirs();
		}

		BufferedImage bufferedImage = ImageIO.read(sfile);
		int h = bufferedImage.getHeight();
		int w = bufferedImage.getWidth();

		// 灰度化
		int[][] gray = new int[w][h];
		for (int x = 0; x < w; x++) {
			for (int y = 0; y < h; y++) {
				int argb = bufferedImage.getRGB(x, y);
				// 图像加亮(调整亮度识别率非常高)
				int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30);
				int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30);
				int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30);
				if (r >= 255) {
					r = 255;
				}
				if (g >= 255) {
					g = 255;
				}
				if (b >= 255) {
					b = 255;
				}
				gray[x][y] = (int) Math
						.pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2)
								* 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2);
			}
		}

		// 二值化
		int threshold = ostu(gray, w, h);
		BufferedImage binaryBufferedImage = new BufferedImage(w, h,
				BufferedImage.TYPE_BYTE_BINARY);
		for (int x = 0; x < w; x++) {
			for (int y = 0; y < h; y++) {
				if (gray[x][y] > threshold) {
					gray[x][y] |= 0x00FFFF;
				} else {
					gray[x][y] &= 0xFF0000;
				}
				binaryBufferedImage.setRGB(x, y, gray[x][y]);
			}
		}

		// 矩阵打印
		for (int y = 0; y < h; y++) {
			for (int x = 0; x < w; x++) {
				if (isBlack(binaryBufferedImage.getRGB(x, y))) {
					System.out.print("*");
				} else {
					System.out.print(" ");
				}
			}
			System.out.println();
		}

		ImageIO.write(binaryBufferedImage, "jpg",
				new File(destDir, sfile.getName()));
	}

	public static boolean isBlack(int colorInt) {
		Color color = new Color(colorInt);
		if (color.getRed() + color.getGreen() + color.getBlue() <= 300) {
			return true;
		}
		return false;
	}

	public static boolean isWhite(int colorInt) {
		Color color = new Color(colorInt);
		if (color.getRed() + color.getGreen() + color.getBlue() > 300) {
			return true;
		}
		return false;
	}

	public static int isBlackOrWhite(int colorInt) {
		if (getColorBright(colorInt) < 30 || getColorBright(colorInt) > 730) {
			return 1;
		}
		return 0;
	}

	public static int getColorBright(int colorInt) {
		Color color = new Color(colorInt);
		return color.getRed() + color.getGreen() + color.getBlue();
	}

	public static int ostu(int[][] gray, int w, int h) {
		int[] histData = new int[w * h];
		// Calculate histogram
		for (int x = 0; x < w; x++) {
			for (int y = 0; y < h; y++) {
				int red = 0xFF & gray[x][y];
				histData[red]++;
			}
		}

		// Total number of pixels
		int total = w * h;

		float sum = 0;
		for (int t = 0; t < 256; t++)
			sum += t * histData[t];

		float sumB = 0;
		int wB = 0;
		int wF = 0;

		float varMax = 0;
		int threshold = 0;

		for (int t = 0; t < 256; t++) {
			wB += histData[t]; // Weight Background
			if (wB == 0)
				continue;

			wF = total - wB; // Weight Foreground
			if (wF == 0)
				break;

			sumB += (float) (t * histData[t]);

			float mB = sumB / wB; // Mean Background
			float mF = (sum - sumB) / wF; // Mean Foreground

			// Calculate Between Class Variance
			float varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF);

			// Check if new maximum found
			if (varBetween > varMax) {
				varMax = varBetween;
				threshold = t;
			}
		}

		return threshold;
	}
}

package com.ocr;

import java.io.File;
public class Test {
    /**
     * @param args
     */
    public static void main(String[] args) {
       // TODO Auto-generated method stub
       OCR ocr=new OCR();
        try {
           String maybe = new OCR().recognizeText(new  File("D://20150325102316.jpg"), "jpg");
           System.out.println(maybe);
       } catch (Exception e) {
           // TODO Auto-generated catch block
           e.printStackTrace();
       }
    }
}


转载:http://lxy.me/centos-compiler-install-tesseract3-0-x-version.html

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值