此代码为自动识别12306火车货运验证码,由于12306火车货运验证码较简单,故本代码没有做图像上的过多变换,只是做了图像灰化,识别正确率90%以上,还可以让它学习更多的验证码样本来提高识别正确率。本代码测试通过后未做过多优化。有验证码识别需求的coder们可以参考下。话不多说,上源码
import java.awt.color.ColorSpace;
import java.awt.image.BufferedImage;
import java.awt.image.ColorConvertOp;
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import javax.imageio.ImageIO;
// 字库处理类
class DBOp {
// 添加字库
public static void insertRec(String psChar, String psBits)
throws ClassNotFoundException, SQLException {
// 1 建立数据库连接
Class.forName("org.sqlite.JDBC");
Connection loConn = null;
loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db");
Statement loExecuter = loConn.createStatement();
loExecuter.setQueryTimeout(1);
// 2 查找Char的记录
ResultSet loChars = loExecuter
.executeQuery("select * from chars where char ='" + psChar
+ "'");
if (loChars.next()) {
int liCharId = loChars.getInt("id");
ResultSet loBitsRecs = loExecuter
.executeQuery("select char_bits from bits where char_id ="
+ liCharId);
while (loBitsRecs.next()) {
String lsOldBits = loBitsRecs.getString("char_bits");
if (lsOldBits.equals(psBits)) {
loConn.close();
return;
}
}
loExecuter.executeUpdate("insert into bits values(" + liCharId
+ ",'" + psBits + "')");
loConn.close();
} else {
loConn.close();
throw new SQLException("数据库格式错误");
}
}
// 获取字库内所有字的bit码
public static ArrayList<String> getAllBits(int piOffset, int piLimit)
throws ClassNotFoundException, SQLException {
// 1 建立数据库连接
Class.forName("org.sqlite.JDBC");
Connection loConn = null;
loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db");
Statement loExecuter = loConn.createStatement();
loExecuter.setQueryTimeout(1);
// 2 获取Bits
ArrayList<String> loResults = new ArrayList<String>();
ResultSet loBits = loExecuter
.executeQuery("select char_bits from bits limit " + piLimit
+ " offset " + piOffset);
while (loBits.next())
loResults.add(loBits.getString("char_bits"));
loConn.close();
return loResults;
}
// 根据字的bit码获取字
public static String getCharByBits(String psBits)
throws ClassNotFoundException, SQLException {
// 1 建立数据库连接
Class.forName("org.sqlite.JDBC");
Connection loConn = null;
loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db");
Statement loExecuter = loConn.createStatement();
loExecuter.setQueryTimeout(1);
// 2 获取Bits
ResultSet loCharIds = loExecuter
.executeQuery("select char_id from bits where char_bits = '"
+ psBits + "'");
String lsResult = null;
if (loCharIds.next()) {
ResultSet loChars = loExecuter
.executeQuery("select char from chars where id = "
+ loCharIds.getString("char_id") + "");
if (loChars.next())
lsResult = loChars.getString("char");
}
loConn.close();
return lsResult;
}
// 获取所有CHar
public static ArrayList<String> getAllChars()
throws ClassNotFoundException, SQLException {
// 1 建立数据库连接
Class.forName("org.sqlite.JDBC");
Connection loConn = null;
loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db");
Statement loExecuter = loConn.createStatement();
loExecuter.setQueryTimeout(1);
// 2 获取Char
ArrayList<String> loResults = new ArrayList<String>();
ResultSet loChars = loExecuter.executeQuery("select * from chars");
while (loChars.next())
loResults.add(loChars.getString("char"));
loConn.close();
return loResults;
}
// 根据CHar获取所有字节记录
public static ArrayList<String> getAllBitsByChar(String psChar)
throws ClassNotFoundException, SQLException {
// 1 建立数据库连接
Class.forName("org.sqlite.JDBC");
Connection loConn = null;
loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db");
Statement loExecuter = loConn.createStatement();
loExecuter.setQueryTimeout(1);
// 2 查找
ArrayList<String> loResults = new ArrayList<String>();
ResultSet loChars = loExecuter
.executeQuery("select * from chars where char ='" + psChar
+ "'");
if (loChars.next()) {
ResultSet loBitsRecs = loExecuter
.executeQuery("select char_bits from bits where char_id ="
+ loChars.getInt("id"));
while (loBitsRecs.next())
loResults.add(loBitsRecs.getString("char_bits"));
} else
throw new SQLException("数据库中找不到" + psChar + "的记录");
loConn.close();
return loResults;
}
}
public class AuthRecg {
private static final String PATH_IMGS = "F:\\Projects\\Test\\yanzhengma";
private static final int THRESHOLD_BIT_EFFECT = 80;
private static final float THRESHOLD_COS_SAME_CHAR = 0.87f;
// 验证码识别公共方法
public static String recg(BufferedImage poImg) {
// 1 扫描图片
ArrayList<String> laScanChars = scan(poImg);
// 2 针对每个字符,分别识别
String lsResult = "";
for (String lsCharBits : laScanChars) {
String lsRecgChar = recgChar(lsCharBits);
lsResult += lsRecgChar == null ? "?" : lsRecgChar;
}
return lsResult;
}
// 产生灰色图片
private static BufferedImage getGrayPicture(BufferedImage poImg) {
BufferedImage loGrayPicture;
int liWidth = poImg.getWidth();
int liHight = poImg.getHeight();
loGrayPicture = new BufferedImage(liWidth, liHight,
BufferedImage.TYPE_3BYTE_BGR);
ColorConvertOp loCco = new ColorConvertOp(
ColorSpace.getInstance(ColorSpace.CS_GRAY), null);
loCco.filter(poImg, loGrayPicture);
return loGrayPicture;
}
// 获取图像bits码
private static ArrayList<String> scan(BufferedImage poImg) {
ArrayList<String> loResults = new ArrayList<String>();
BufferedImage loGrayImg = getGrayPicture(poImg);
int liWidth = loGrayImg.getWidth();
int liHeight = loGrayImg.getHeight();
boolean lbStarted = false;
int liMinX = -1, liMinY = -1, liMaxX = -1, liMaxY = -1;
ArrayList<int[]> loCharCols = new ArrayList<int[]>();
for (int x = 0; x < liWidth; x++) {
int[] laCol = new int[liHeight];
for (int y = 0; y < liHeight; y++) {
int liValue = poImg.getRGB(x, y) % 0x100;
if (liValue < -THRESHOLD_BIT_EFFECT) {
laCol[y] = 1;
lbStarted = true;
if (liMinX == -1 || liMinX > x)
liMinX = x;
if (liMinY == -1 || liMinY > y)
liMinY = y;
if (liMaxX == -1 || liMaxX < x)
liMaxX = x;
if (liMaxY == -1 || liMaxY < y)
liMaxY = y;
} else
laCol[y] = 0;
}
if (lbStarted) {
loCharCols.add(laCol);
lbStarted = false;
if (x == liWidth - 1) {
String lsCharBits = "";
for (int i = liMinX; i <= liMaxX; i++) {
int[] laColBits = loCharCols.get(i - liMinX);
for (int j = liMinY; j <= liMaxY; j++) {
lsCharBits += laColBits[j];
}
}
lsCharBits += "|" + (liMaxX - liMinX + 1) + ","
+ (liMaxY - liMinY + 1);
loResults.add(lsCharBits);
}
} else {
if (loCharCols.size() > 0) {
String lsCharBits = "";
for (int i = liMinX; i <= liMaxX; i++) {
int[] laColBits = loCharCols.get(i - liMinX);
for (int j = liMinY; j <= liMaxY; j++) {
lsCharBits += laColBits[j];
}
}
lsCharBits += "|" + (liMaxX - liMinX + 1) + ","
+ (liMaxY - liMinY + 1);
loResults.add(lsCharBits);
liMinX = -1;
liMinY = -1;
liMaxX = -1;
liMaxY = -1;
loCharCols = new ArrayList<int[]>();
}
}
}
return loResults;
}
// 字库学习样本
public static void study(BufferedImage poImg, String psChars) {
ArrayList<String> loResults = scan(poImg);
if (loResults.size() == psChars.length()) {
char[] laChars = psChars.toCharArray();
for (int i = 0; i < laChars.length; i++)
try {
DBOp.insertRec(String.valueOf(laChars[i]), loResults.get(i));
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
}
// 向量余弦值计算
private static float calcCos(char[] paBits1, char[] paBits2) {
float lfMolecule = 0; // 两个向量内积
int liLen1 = 0, liLen2 = 0; // 两个向量的长度
for (int i = 0; i < paBits1.length; i++) {
int liBit1 = Integer.parseInt("" + paBits1[i]);
int liBit2 = Integer.parseInt("" + paBits2[i]);
lfMolecule += liBit1 & liBit2;
liLen1 += liBit1;
liLen2 += liBit2;
}
float lfLen1 = (float) Math.sqrt(liLen1);
float lfLen2 = (float) Math.sqrt(liLen2);
float lfDenominator = lfLen1 * lfLen2;
return lfDenominator == 0 ? 0 : lfMolecule / lfDenominator;
}
// 识别单一字
private static String recgChar(String psBits) {
String[] lsTwoParts = psBits.split("\\|");
int liOffset = 0, liLimit = 50;
while (true) {
ArrayList<String> laBits = null;
try {
laBits = DBOp.getAllBits(liOffset, liLimit);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
for (String lsBits : laBits) {
String[] lsParts = lsBits.split("\\|");
if (lsTwoParts[1].equals(lsParts[1])) {
if (calcCos(lsTwoParts[0].toCharArray(),
lsParts[0].toCharArray()) >= THRESHOLD_COS_SAME_CHAR)
try {
return DBOp.getCharByBits(lsBits);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
if (laBits.size() < liLimit)
break;
liOffset += liLimit;
}
return null;
}
public static void main(String[] args) throws IOException,
ClassNotFoundException {
// 1 给出一些样本,供字库学习
String[] laPics = new String[201];
for (int i = 0; i < 201; i++)
laPics[i] = "untitled" + i + ".png";
String[] loStrs = new String[] { "piade", "eusts", "tafe", "snund",
"baips", "gike", "fenes", "lein", "caws", "falo", "abys",
"aift", "wiin", "hoeds", "pise", "hiker", "opes", "nain",
"tene", "nased", "loop", "eaips", "camp", "traly", "hate",
"noped", "coll", "rirs", "bolk", "modid", "thre", "knder",
"bots", "safer", "thrds", "flins", "coory", "ilper", "juyer",
"goll", "soar", "foen", "dewls", "slme", "flows", "baoks",
"plugh", "tames", "lorm", "boler", "wift", "feded", "knfer",
"lark", "chat", "tots", "barst", "ouard", "sots", "ouing",
"neory", "bave", "buab", "baces", "mebar", "baof", "badly",
"baast", "bages", "baed", "baue", "bager", "bogs", "baled",
"beree", "balds", "baamp", "baugh", "bater", "beart", "bere",
"caed", "daick", "cuual", "icer", "muack", "cave", "coeed",
"ferce", "costs", "ceam", "clen", "caer", "cast", "wace",
"coep", "paced", "half", "bofts", "loft", "fimy", "maaf",
"foeam", "falys", "bafes", "soght", "yege", "args", "kiged",
"geman", "stge", "geugh", "baing", "goner", "gocer", "gird",
"gorns", "thger", "cluth", "hern", "thts", "derch", "hook",
"kixed", "haown", "seach", "sath", "geve", "soaly", "wames",
"yaows", "vies", "marm", "trows", "yased", "zoink", "liys",
"evss", "soys", "tayed", "clove", "woows", "jute", "voled",
"fewns", "saow", "roxes", "wiyal", "roves", "fiyed", "wigs",
"waxes", "twter", "evrk", "evee", "piys", "voes", "zoced",
"werry", "velps", "eyns", "evce", "vikes", "ovat", "guve",
"slfe", "taes", "usice", "jaade", "knva", "saar", "cazac",
"bems", "hafe", "trons", "uslp", "sexed", "faed", "tild",
"toow", "toar", "drall", "hazes", "yaad", "seubt", "strns",
"buap", "redly", "shgs", "sapt", "kins", "buee", "famed",
"coack", "hamy", "puder", "puod", "jaons", "jarks", "poate",
"thlks" };
for (int i = 0; i < 201; i++) {
System.out.println(i);
study(ImageIO.read(new File(PATH_IMGS + "\\untitled" + i + ".png")),
loStrs[i]);
}
// 2 获取11个验证码,进行识别
for (int i = 0; i < 11; i++)
System.out.println(recg(ImageIO.read(new File(
"F:\\Projects\\Test\\tests\\untitled" + i + ".png"))));
}
}
字库采用SQLITE数据库存储,表结构如下:
训练学习样本201个验证码图片:
测试图片:
执行结果: