1、登录有图片验证码时,使用tesseract-ocr解析图片
2、安装
- windows:本地调用,先安装tesseract-ocr包
- linux
- 执行yum install icu 52.1 libicu-devel pango 1.22.0 libgnomeui-devel libtiff-devel libjpeg-devel libpng-devel -y
- 安装leptonica-1.78.0.tar.gz:1、cd leptonica-1.78.0 2、 ./ autogen.sh 3、./configure --prefix=/usr/local/ 4、make 5、make install
- 安装tesseract-4.0.0.tar.gz:1、cd tesseract-4.0.0 2、 ./ autogen.sh 3、./configure : 注意是否有warning提示,如果有提示需要把插件安装完成 4、make 5、make install 6、ldconfig
-
vim /etc/profile 添加:
PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib64/pkgconfig
export PKG_CONFIG_PATH
CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/include/
export CPLUS_INCLUDE_PATH
C_INCLUDE_PATH=$C_INCLUDE_PATH:/usr/local/include/leptonica
export C_INCLUDE_PATH
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64
export LD_LIBRARY_PATH
LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib64
export LIBRARY_PATH
保存后执行:
source /etc/profile
4、pom引入:
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>3.2.1</version>
</dependency>
5、代码:先通过接口/getSecurityCode?time=获取图片,并保存到本地,开始解析图片
package com.yiliao.utils;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang.StringUtils;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
public class OCRHelper
{
public static void main(String[] args) throws IOException{
}
/**
*
* @param sfile
* 需要去噪的图像
* @param destDir
* 去噪后的图像保存地址
* @throws IOException
*/
public static void cleanImage(File sfile, String destDir)
{
File destF = new File(destDir);
if (!destF.exists())
{
destF.mkdirs();
}
BufferedImage bufferedImage = null;
try {
bufferedImage = ImageIO.read(sfile);
} catch (IOException e) {
e.printStackTrace();
}
int h = bufferedImage.getHeight();
int w = bufferedImage.getWidth();
// 灰度化
int[][] gray = new int[w][h];
for (int x = 0; x < w; x++)
{
for (int y = 0; y < h; y++)
{
int argb = bufferedImage.getRGB(x, y);
// 图像加亮(调整亮度识别率非常高)
int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30);
int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30);
int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30);
if (r >= 255)
{
r = 255;
}
if (g >= 255)
{
g = 255;
}
if (b >= 255)
{
b = 255;
}
gray[x][y] = (int) Math
.pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2)
* 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2);
}
}
// 二值化
int threshold = ostu(gray, w, h);
BufferedImage binaryBufferedImage = new BufferedImage(w, h,
BufferedImage.TYPE_BYTE_BINARY);
for (int x = 0; x < w; x++)
{
for (int y = 0; y < h; y++)
{
if (gray[x][y] > threshold)
{
gray[x][y] |= 0x00FFFF;
} else
{
gray[x][y] &= 0xFF0000;
}
binaryBufferedImage.setRGB(x, y, gray[x][y]);
}
}
try {
ImageIO.write(binaryBufferedImage, "jpg", new File(destDir, sfile
.getName()));
} catch (IOException e) {
e.printStackTrace();
}
}
public static int ostu(int[][] gray, int w, int h)
{
int[] histData = new int[w * h];
// Calculate histogram
for (int x = 0; x < w; x++)
{
for (int y = 0; y < h; y++)
{
int red = 0xFF & gray[x][y];
histData[red]++;
}
}
// Total number of pixels
int total = w * h;
float sum = 0;
for (int t = 0; t < 256; t++)
sum += t * histData[t];
float sumB = 0;
int wB = 0;
int wF = 0;
float varMax = 0;
int threshold = 0;
for (int t = 0; t < 256; t++)
{
wB += histData[t]; // Weight Background
if (wB == 0)
continue;
wF = total - wB; // Weight Foreground
if (wF == 0)
break;
sumB += (float) (t * histData[t]);
float mB = sumB / wB; // Mean Background
float mF = (sum - sumB) / wF; // Mean Foreground
// Calculate Between Class Variance
float varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF);
// Check if new maximum found
if (varBetween > varMax)
{
varMax = varBetween;
threshold = t;
}
}
return threshold;
}
private static final String path = System.getProperty("user.dir") + "/src/main/resources/";
public static String analysisCode(){
// String tessPath = new File("tessdata").getAbsolutePath();
cleanImage(new File(path+"uploadFile/securityCode.jpg"),path+"uploadFile/");
Tesseract tesseract = new Tesseract();
tesseract.setLanguage("eng");
tesseract.setDatapath(path+"tessdata");
String result = null;
try {
result = tesseract.doOCR(new File(path+"uploadFile/securityCode.jpg"));
} catch (TesseractException e) {
e.printStackTrace();
}
if (null != result){
return result.replaceAll("\\s*", "");
}
return null;
}
}