其实,验证码识别其实就是用好用好tesseract-ocr,下面我先说下步骤:
1、去https://github.com/tesseract-ocr/tesseract/wiki 下载tesseract-ocr,然后安装一下;
2、代码中pom中加入:
<dependency> <groupId>net.sourceforge.tess4j</groupId> <artifactId>tess4j</artifactId> <version>3.1.0</version> </dependency>
3、使用最最基本的图片识别API
ITesseract instance = new Tesseract();
String ocrResult = null;
try { ocrResult = instance.doOCR(imgFile); } catch (Exception e) { e.printStackTrace(); }
4、以上识别只能识别最最最简单的验证码图片,要想提高准确率,就需要去除噪音,先上传个简单的例子吧
package util;
import net.sourceforge.tess4j.ITesseract; import net.sourceforge.tess4j.Tesseract; import java.awt.Color; import java.awt.color.ColorSpace; import java.awt.image.BufferedImage; import java.awt.image.ColorConvertOp; import java.io.File; import java.io.IOException; import javax.imageio.ImageIO; public class ClearImageUtil { public static void cleanImage(File sfile, String destDir)throws IOException{ File destF = new File(destDir); if (!destF.exists()){ destF.mkdirs(); } BufferedImage bufferedImage = ImageIO.read(sfile); int h = bufferedImage.getHeight(); int w = bufferedImage.getWidth(); // 灰度化 int[][] gray = new int[w][h]; for (int x = 0; x < w; x++){ for (int y = 0; y < h; y++){ int argb = bufferedImage.getRGB(x, y); // 图像加亮(调整亮度识别率非常高) int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30); int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30); int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30); if (r >= 255){ r = 255; } if (g >= 255){ g = 255; } if (b >= 255){ b = 255; } gray[x][y] = (int) Math.pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2)* 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2); } } // 二值化 int threshold = ostu(gray, w, h); if (threshold < 160 && threshold > 100) threshold -= 50; else if (threshold < 50) threshold = 50; BufferedImage binaryBufferedImage = new BufferedImage(w, h,BufferedImage.TYPE_BYTE_BINARY); for (int x = 0; x < w; x++){ for (int y = 0; y < h; y++){ if (gray[x][y] > threshold){ gray[x][y] |= 0x00FFFF; } else{ gray[x][y] &= 0xFF0000; } binaryBufferedImage.setRGB(x, y, gray[x][y]); } } ImageIO.write(binaryBufferedImage, "png", new File(destDir, sfile.getName())); } public static int getColorBright(int colorInt){ Color color = new Color(colorInt); return color.getRed() + color.getGreen() + color.getBlue(); } public static int ostu(int[][] gray, int w, int h){ int[] histData = new int[w * h]; // Calculate histogram for (int x = 0; x < w; x++){ for (int y = 0; y < h; y++){ int red = 0xFF & gray[x][y]; histData[red]++; } } // Total number of pixels int total = w * h; float sum = 0; for (int t = 0; t < 256; t++) sum += t * histData[t]; float sumB = 0; int wB = 0; int wF = 0; float varMax = 0; int threshold = 0; for (int t = 0; t < 256; t++){ wB += histData[t]; // Weight Background if (wB == 0) continue; wF = total - wB; // Weight Foreground if (wF == 0) break; sumB += (float) (t * histData[t]); float mB = sumB / wB; // Mean Background float mF = (sum - sumB) / wF; // Mean Foreground // Calculate Between Class Variance float varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF); // Check if new maximum found if (varBetween > varMax){ varMax = varBetween; threshold = t; } } return threshold; } //图片灰度,黑白 public static void gray(String srcImageFile, String destImageFile) { try { BufferedImage src = ImageIO.read(new File(srcImageFile)); ColorSpace cs = ColorSpace.getInstance(ColorSpace.CS_GRAY); ColorConvertOp op = new ColorConvertOp(cs, null); src = op.filter(src, null); ImageIO.write(src, "JPEG", new File(destImageFile)); } catch (IOException e) { e.printStackTrace(); } }
//只获取数字和字母 public static String getRcode(String imgfile) { String destDir ="img_des\\"; String graydir = "img_gray\\"; String reg = "[0-9a-zA-Z]"; File f = new File(imgfile); System.out.println(f.getName()); System.out.println(destDir + f.getName()); try { cleanImage(f, destDir); cleanImage(new File(destDir + f.getName()), graydir); ITesseract instance = new Tesseract(); String ocrResult = instance.doOCR(new File(graydir +f.getName())); String rcode = StringUtil.regstr(reg, ocrResult); return rcode; } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } return null; } public static String getRcode(File f) { String destDir ="img_des\\"; String graydir = "img_gray\\"; String reg = "[0-9a-zA-Z]"; try { cleanImage(f, destDir); cleanImage(new File(destDir + f.getName()), graydir); ITesseract instance = new Tesseract(); String ocrResult = instance.doOCR(new File(graydir +f.getName())); String rcode = StringUtil.regstr(reg, ocrResult); return rcode; } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } return null; } public static void main(String[] args) throws IOException{ File testDataDir = new File("C:\\Users\\你好\\Desktop\\验证码\\");//去噪 String destDir ="C:\\Users\\nihao\\Desktop\\s2\\"; String graydir = "C:\\Users\\nihao\\Desktop\\s3\\"; for (File f :testDataDir.listFiles()) { System.out.println(f.getName()); System.out.println(destDir + f.getName()); cleanImage(f, destDir); cleanImage(new File(destDir + f.getName()), graydir); } } }