两个方法目前只是雏形 ,能获取关键信息,后续打算分开拓展 ,目前手机拍照的光线正常的也可识别出来
先引用maven
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>5.9.0</version>
</dependency>
方法调用
import net.sourceforge.tess4j.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
public class ReadIDCardFromImage {
public static String getTessdataPath() throws IOException {
// 创建临时目录
Path tempDir = Files.createTempDirectory("tessdata");
// 获取类加载器
ClassLoader classLoader = ReadIDCardFromImage.class.getClassLoader();
// 定义需要复制的文件列表
String[] resourceNames = new String[]{"chi_sim.traineddata", "eng.traineddata"};
for (String resourceName : resourceNames) {
// 获取资源的 URL
URL resource = classLoader.getResource("tessdata/" + resourceName);
if (resource != null) {
try (InputStream inputStream = resource.openStream()) {
// 解析目标路径
Path targetPath = tempDir.resolve(resourceName);
// 复制文件
Files.copy(inputStream, targetPath, StandardCopyOption.REPLACE_EXISTING);
}
}
}
return tempDir.toString();
}
public static String getIdCardNumber(MultipartFile multipartFile, String type) {
String resultNumber = "";
if (multipartFile == null || multipartFile.isEmpty()) {
System.err.println("上传的文件为空,请检查。");
return resultNumber;
}
try {
// 设置 Tesseract 数据路径
ITesseract tesseract = new Tesseract();
// 获取 tessdata 路径
String dataPath = getTessdataPath();
tesseract.setDatapath(dataPath);
System.out.println("设置的 tessdata 路径: " + dataPath);
// 设置识别语言
tesseract.setLanguage("chi_sim+eng");
// 创建临时文件
File imageFile = File.createTempFile("temp", null);
// 将上传的文件内容写入临时文件
Files.write(imageFile.toPath(), multipartFile.getBytes());
// File imageFile = new File("D:\\tx\\comcard.png");
if (!imageFile.exists()) {
System.err.println("图片文件不存在,请检查路径。");
return resultNumber;
}
// 执行 OCR 识别
String result = tesseract.doOCR(imageFile);
System.out.println(result);
if ("1".equals(type)) {
// 身份证识别
resultNumber = IDCardOptimizer.optimizeIDCardNumber(result.trim());
} else {
// 营业执照识别
resultNumber = CreditCodeOptimizer.optimizeCreditCode(result.trim());
}
// 标记临时文件在 JVM 退出时删除 TODO上线要放开
imageFile.deleteOnExit();
} catch (Exception e) {
System.err.println("OCR 识别出错: " + e.getMessage());
}
return resultNumber;
}
/* public static void main(String[] args) {
System.out.println(getIdCardNumber(null,"2"));
}*/
}
识别个人身份证工具类
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IDCardOptimizer {
// 混淆字符映射表
private static final Map<Character, Character> CONFUSION_MAP = new HashMap<Character, Character>() {{
put('I', '1');
put('丨', '1');
put('l', '1');
put('O', '0');
put('○', '0');
put('Z', '2');
put('S', '5');
put('s', '5');
put('B', '8');
put('%', 'X');
put('x', 'X');
put('g', '9');
put('b', '8');
put('T', '7');
put('t', '7');
}};
/**
* 从 OCR 文本中提取并校正身份证号码
* @param ocrText OCR 识别出的原始文本
* @return 校正后的有效身份证号码,若无合法号码返回 null
*/
public static String optimizeIDCardNumber(String ocrText) {
// 先进行全局字符替换
String replacedText = replaceConfusionChars(ocrText);
// 1. 提取候选号码
String rawNumber = extractRawIDCardNumber(replacedText);
if (rawNumber == null) {
System.out.println("未从文本中提取到候选身份证号码。");
return null;
}
System.out.println("提取到的原始号码: " + rawNumber);
// 2. 基础字符替换
String normalizedNumber = normalizeChars(rawNumber);
System.out.println("基础字符替换后的号码: " + normalizedNumber);
// 3. 校验码验证与智能修正
return validateAndCorrect(normalizedNumber);
}
// 从文本中提取 18 位候选身份证号码
private static String extractRawIDCardNumber(String text) {
// 正则表达式匹配 18 位身份证号码
Pattern pattern = Pattern.compile("(\\d{17}[0-9Xx])");
Matcher matcher = pattern.matcher(text);
return matcher.find() ? matcher.group(1) : null;
}
// 字符标准化处理
private static String normalizeChars(String rawNumber) {
StringBuilder sb = new StringBuilder();
for (char c : rawNumber.toUpperCase().toCharArray()) {
sb.append(CONFUSION_MAP.getOrDefault(c, c));
}
return sb.toString();
}
// 校验与智能修正
private static String validateAndCorrect(String number) {
if (number.length() != 18) {
System.out.println("号码长度不为 18 位,无法进行校验。");
return null;
}
// 直接验证通过
if (validateCheckDigit(number)) {
System.out.println("号码直接验证通过。");
return number;
}
// 生成候选修正方案
String[] candidates = generateCandidates(number);
for (String candidate : candidates) {
if (validateCheckDigit(candidate)) {
System.out.println("找到有效的修正方案: " + candidate);
return candidate;
}
}
System.out.println("未找到有效的修正方案。");
return null;
}
// 生成候选修正方案
private static String[] generateCandidates(String number) {
String[] candidates = new String[18];
// 重新计算校验位
candidates[0] = number.substring(0, 17) + calculateCheckDigit(number.substring(0, 17));
// 尝试替换每一位
for (int i = 0; i < 17; i++) {
String baseChars = "0123456789";
for (char c : baseChars.toCharArray()) {
String candidate = number.substring(0, i) + c + number.substring(i + 1, 17) + calculateCheckDigit(number.substring(0, i) + c + number.substring(i + 1, 17));
if (!candidate.equals(number)) {
candidates[i + 1] = candidate;
break;
}
}
}
return candidates;
}
/**
* 身份证号码校验码验证
* @param number 待验证号码(必须 18 位)
* @return 是否通过校验
*/
public static boolean validateCheckDigit(String number) {
if (number == null || number.length() != 18) return false;
int[] weights = {7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2};
char[] checkCodes = {'1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2'};
int sum = 0;
for (int i = 0; i < 17; i++) {
int digit = Character.getNumericValue(number.charAt(i));
sum += digit * weights[i];
}
int index = sum % 11;
return number.charAt(17) == checkCodes[index];
}
// 计算校验位
private static char calculateCheckDigit(String first17) {
int[] weights = {7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2};
char[] checkCodes = {'1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2'};
int sum = 0;
for (int i = 0; i < 17; i++) {
int digit = Character.getNumericValue(first17.charAt(i));
sum += digit * weights[i];
}
int index = sum % 11;
return checkCodes[index];
}
// 全局替换混淆字符
private static String replaceConfusionChars(String text) {
StringBuilder sb = new StringBuilder();
for (char c : text.toCharArray()) {
sb.append(CONFUSION_MAP.getOrDefault(c, c));
}
return sb.toString();
}
// 使用示例
public static void main(String[] args) {
String ocrText = "公 民 身 份 号 码 xxxxxxxxxx";
String result = optimizeIDCardNumber(ocrText);
System.out.println("原始识别: xxxxxxxxxxxx%");
System.out.println("优化结果: " + result);
}
识别统一信用代码工具类
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CreditCodeOptimizer {
// 混淆字符映射表
private static final Map<Character, Character> CONFUSION_MAP = new HashMap<Character, Character>() {{
put('I', '1');
put('丨', '1');
put('l', '1');
put('O', '0');
put('○', '0');
put('Z', '2');
put('S', '5');
put('B', '8');
}};
/**
* 从OCR文本中提取并校正统一社会信用代码
* @param ocrText OCR识别出的原始文本
* @return 校正后的有效信用代码,若无合法代码返回null
*/
public static String optimizeCreditCode(String ocrText) {
// 1. 提取候选代码
String rawCode = extractRawCode(ocrText);
if (rawCode == null) {
System.out.println("未从文本中提取到18位候选代码。");
return null;
}
System.out.println("提取到的原始代码: " + rawCode);
// 2. 基础字符替换
String normalizedCode = normalizeChars(rawCode);
System.out.println("基础字符替换后的代码: " + normalizedCode);
// 3. 校验码验证与智能修正
return validateAndCorrect(normalizedCode);
}
// 从文本中提取18位候选代码
private static String extractRawCode(String text) {
// 更灵活的正则表达式,可匹配多种可能的格式
Pattern pattern = Pattern.compile("[159Y][1-9A-NP-Z][0-9]{6}[0-9A-Z]{9}[0-9A-Z]");
Matcher matcher = pattern.matcher(text);
return matcher.find() ? matcher.group() : null;
}
// 字符标准化处理
private static String normalizeChars(String rawCode) {
StringBuilder sb = new StringBuilder();
for (char c : rawCode.toUpperCase().toCharArray()) {
sb.append(CONFUSION_MAP.getOrDefault(c, c));
}
return sb.toString();
}
// 校验与智能修正
private static String validateAndCorrect(String code) {
if (code.length() != 18) {
System.out.println("代码长度不为18位,无法进行校验。");
return null;
}
// 直接验证通过
if (validateCheckDigit(code)) {
System.out.println("代码直接验证通过。");
return code;
}
// 生成更多候选修正方案
String[] candidates = generateCandidates(code);
for (String candidate : candidates) {
if (validateCheckDigit(candidate)) {
System.out.println("找到有效的修正方案: " + candidate);
return candidate;
}
}
System.out.println("未找到有效的修正方案。");
return null;
}
// 生成候选修正方案
private static String[] generateCandidates(String code) {
String[] candidates = new String[19];
// 重新计算校验位
candidates[0] = code.substring(0, 17) + calculateCheckDigit(code.substring(0, 17));
// 尝试替换每一位
for (int i = 0; i < 17; i++) {
String baseChars = "0123456789ABCDEFGHJKLMNPQRTUWXY";
for (char c : baseChars.toCharArray()) {
String candidate = code.substring(0, i) + c + code.substring(i + 1, 17) + calculateCheckDigit(code.substring(0, i) + c + code.substring(i + 1, 17));
if (!candidate.equals(code)) {
candidates[i + 1] = candidate;
break;
}
}
}
return candidates;
}
/**
* GB 32100-2015 校验码验证
* @param code 待验证代码(必须18位)
* @return 是否通过校验
*/
public static boolean validateCheckDigit(String code) {
if (code == null || code.length() != 18) return false;
int[] weights = {1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, 30, 28};
String charMap = "0123456789ABCDEFGHJKLMNPQRTUWXY";
int sum = 0;
for (int i = 0; i < 17; i++) {
int value = charMap.indexOf(code.charAt(i));
if (value == -1) return false;
sum += value * weights[i];
}
int checkValue = (31 - (sum % 31)) % 31;
return code.charAt(17) == charMap.charAt(checkValue);
}
// 计算校验位
private static char calculateCheckDigit(String first17) {
int[] weights = {1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, 30, 28};
String charMap = "0123456789ABCDEFGHJKLMNPQRTUWXY";
int sum = 0;
for (int i = 0; i < 17; i++) {
sum += charMap.indexOf(first17.charAt(i)) * weights[i];
}
return charMap.charAt((31 - (sum % 31)) % 31);
}
// 使用示例
public static void main(String[] args) {
String ocrText = "xxxxxxxxxxxx二 | [ `禽…二工`霰′_( 3 荸 需董蓁辞_ @ '".trim();
String result = optimizeCreditCode(ocrText);
System.out.println("原始识别: xxxxxxxxxxxx");
System.out.println("优化结果: " + result);
}
}