百度OCR识别使用教程(详细攻略)
获取client_id和client_secret
我已经领取过了,点击去领取根据后续指引领取
创建应用是要有通用文字识别(标准版)、增值税发票验真、增值税发票识别
由于TEST应用已经存在所以我改名为TEST001,根据自己创建的名字找到对应的API Key和Secret Key
jar依赖
fastjson:格式化json字符串
log4j:日志输出
commons:集合操作工具类
<!-- fastjson-->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.28</version>
</dependency>
<!-- log4j-->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.17.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.17.2</version>
</dependency>
<!--tools-->
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.10</version>
</dependency>
BaseImg64Util类
将图片转成字符串
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
public class BaseImg64Util {
private static final Logger logger = LogManager.getLogger(BaseImg64Util.class);
public static String getImageStrByPath(String imgPath)throws IOException {
InputStream in = null;
byte[] data = null;
try {
in = new FileInputStream(imgPath);
data = new byte[in.available()];
in.read(data);
}catch (IOException e){
logger.error("图片转Base64发生异常!", e);
throw new RuntimeException(e);
}finally {
if (in != null){
in.close();
}
}
Base64.Encoder encoder = Base64.getEncoder();
return URLEncoder.encode(encoder.encodeToString(data), StandardCharsets.UTF_8);
}
private BaseImg64Util() {
}
}
CustomUtils类
实现逻辑包含:通用文字识别(标准版)、增值税发票识别、增值税发票验真、获取Token
注:搜索client_id=应用的API Key替换对应数据如:client_id=F1SJLiNolqAr
注:搜索client_secret=应用的Secret Key替换成对应数据如:client_secret=ayZYFnavTfYU
InvoiceDate:根据实际情况格式化:期望值格式为:20201011
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import org.apache.commons.collections.MapUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
public class CustomUtils {
private static final Logger logger = LogManager.getLogger(CustomUtils.class);
private final static String EQUAL_SIGN = "=";
private final static String AND_SIGN = "&";
public enum FieldEnum{
// url
AUTH_URL("AUTH_URL", "https://aip.baidubce.com/oauth/2.0/token?", "获取百度token"),
GENERAL_BASIC("general_basic", "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=", "通用文字识别(标准版)"),
VAT_INVOICE("vat_invoice", "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice?access_token=", "增值税发票识别"),
VAT_INVOICE_VERIFICATION("vat_invoice_verification", "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice_verification?access_token=", "增值税发票验真"),
// 访问百度的参数
GRANT_TYPE("grant_type", "grant_type=client_credentials", "必须参数,固定为client_credentials"),
CLIENT_ID("client_id", "client_id=应用的API Key", "必须参数,应用的API Key"),
CLIENT_SECRET("client_secret", "client_secret=应用的Secret Key", "必须参数,应用的Secret Key"),
X_WWW_FORM_URLENCODED("Content-Type", "application/x-www-form-urlencoded", ""),
CONTENT_TYPE_JSON("Content-Type", "application/json", ""),
ACCEPT_JSON("Accept", "application/json", ""),
IMAGE("image", "image", "image字段"),
METHOD_GET("METHOD", "GET", "GET请求"),
METHOD_POST("METHOD", "POST", "POST请求"),
ACCESS_TOKEN("access_token", "", ""),
;
private String key;
private String val;
private String note;
public static void setAccessToken(String token){
ACCESS_TOKEN.val = token;
}
FieldEnum(String key, String val, String note) {
this.key = key;
this.val = val;
this.note = note;
}
public String getKey() {
return key;
}
public String getVal() {
return val;
}
public String getNote() {
return note;
}
}
/***
* 工具类私有化禁止实例化
* @author 司马公和
* @date 2023/1/11 18:43
* @param []
* @return
*/
private CustomUtils() {
}
/***
* 发票验真
* @author 司马公和
* @date 2023/1/11 18:44
* @param [params]
* @return
*/
public static Map<String, String> vatInvoiceVerification(Map<String, String> params) throws IOException {
Map<String, String> result = new HashMap<>();
if (MapUtils.isEmpty(params)){
return result;
}
String checkCode = params.get("CheckCode");
checkCode = checkCode.substring(checkCode.length() - 6);
String param = "&invoice_code=" + params.get("InvoiceCode") + "&invoice_num=" + params.get("InvoiceNum") + "&invoice_date="
+ params.get("InvoiceDate").replaceAll("年", "").replaceAll("月", "").replaceAll("日", "")
+ "&check_code=" + checkCode + "&invoice_type=" + "elec_normal_invoice" + "&total_amount=" + params.get("TotalAmount");
Map<String, String> map = new HashMap<>();
map.put(FieldEnum.METHOD_POST.key, FieldEnum.METHOD_POST.val);
map.put(FieldEnum.X_WWW_FORM_URLENCODED.key, FieldEnum.X_WWW_FORM_URLENCODED.val);
map.put(FieldEnum.ACCEPT_JSON.key, FieldEnum.ACCEPT_JSON.val);
String url = FieldEnum.VAT_INVOICE_VERIFICATION.val + FieldEnum.ACCESS_TOKEN.val + param;
result = recognition(url, map);
return result;
}
/***
* 发票识别
* @author 司马公和
* @date 2023/1/11 18:45
* @param [imagePath]
* @return
*/
public static Map<String, String> vatInvoice(String imagePath) throws IOException {
Map<String, String> result = new HashMap<>();
if (checkExists(imagePath)){
return result;
}
String imageStr = BaseImg64Util.getImageStrByPath(imagePath);
Map<String, String> params = new HashMap<>();
params.put(FieldEnum.METHOD_POST.key, FieldEnum.METHOD_POST.val);
params.put(FieldEnum.X_WWW_FORM_URLENCODED.key, FieldEnum.X_WWW_FORM_URLENCODED.val);
params.put(FieldEnum.ACCEPT_JSON.key, FieldEnum.ACCEPT_JSON.val);
params.put(FieldEnum.IMAGE.key, imageStr);
String url = FieldEnum.VAT_INVOICE.val + FieldEnum.ACCESS_TOKEN.val;
result = recognition(url, params);
return JSON.parseObject(result.get("words_result"), new TypeReference<Map<String, String>>(){});
}
/***
* 校验文件是否存在
* @author 司马公和
* @date 2023/1/11 18:45
* @param [imagePath]
* @return
*/
public static boolean checkExists(String path){
File file = new File(path);
boolean bool = !file.exists();
if (bool){
logger.info("文件不存在!");
}
return bool;
}
/***
* 图片文字识别
* @author 司马公和
* @date 2023/1/11 18:45
* @param [imagePath]
* @return
*/
public static Map<String, String> generalBasic(String imagePath)throws IOException{
Map<String, String> result = new HashMap<>();
if (checkExists(imagePath)){
return result;
}
String imageStr = BaseImg64Util.getImageStrByPath(imagePath);
Map<String, String> params = new HashMap<>();
params.put(FieldEnum.METHOD_POST.key, FieldEnum.METHOD_POST.val);
params.put(FieldEnum.X_WWW_FORM_URLENCODED.key, FieldEnum.X_WWW_FORM_URLENCODED.val);
params.put(FieldEnum.ACCEPT_JSON.key, FieldEnum.ACCEPT_JSON.val);
params.put(FieldEnum.IMAGE.key, imageStr);
String url = FieldEnum.GENERAL_BASIC.val + FieldEnum.ACCESS_TOKEN.val;
result = recognition(url, params);
return result;
}
/***
* 获取Token
* @author 司马公和
* @date 2023/1/11 18:45
* @param []
* @return
*/
public static Map<String, String> getToken(){
Map<String, String> params = new HashMap<>();
params.put(FieldEnum.METHOD_GET.key, FieldEnum.METHOD_GET.val);
params.put(FieldEnum.X_WWW_FORM_URLENCODED.key, FieldEnum.CONTENT_TYPE_JSON.val);
params.put(FieldEnum.ACCEPT_JSON.key, FieldEnum.ACCEPT_JSON.val);
Map<String, String> result = recognition(getAccessTokenUrl(), params);
FieldEnum.setAccessToken(result.get(FieldEnum.ACCESS_TOKEN.key));
return result;
}
/***
* 识别通用方法
* @author 司马公和
* @date 2023/1/11 18:46
* @param [requestUrl, params]
* @return
*/
public static Map<String, String> recognition(String requestUrl, Map<String, String> params){
HttpURLConnection connection = null;
OutputStreamWriter writer = null;
BufferedReader reader = null;
StringBuilder builder = new StringBuilder();
try {
URL url = new URL(requestUrl);
connection = (HttpURLConnection) url.openConnection();
if (params.containsKey(FieldEnum.METHOD_POST.key)){
connection.setRequestMethod(params.get(FieldEnum.METHOD_POST.key));
params.remove(FieldEnum.METHOD_POST.key);
}
connection.setDoOutput(Boolean.TRUE);
connection.setDoInput(Boolean.TRUE);
connection.setUseCaches(Boolean.FALSE);
if (params.containsKey(FieldEnum.CONTENT_TYPE_JSON.key)){
connection.setRequestProperty("Content-Type", params.get(FieldEnum.CONTENT_TYPE_JSON.key));
params.remove(FieldEnum.CONTENT_TYPE_JSON.key);
}
if (params.containsKey(FieldEnum.ACCEPT_JSON.key)){
connection.setRequestProperty("Accept", params.get(FieldEnum.ACCEPT_JSON.key));
params.remove(FieldEnum.ACCEPT_JSON.key);
}
if (MapUtils.isNotEmpty(params)){
writer = new OutputStreamWriter(connection.getOutputStream());
for (String key : params.keySet()) {
writer.write(key.concat(EQUAL_SIGN).concat(params.get(key)));
}
writer.flush();
}
if (connection.getResponseCode() == 200){
reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String line = "";
while ((line = reader.readLine()) != null){
builder.append(line);
}
}
}catch (IOException e){
logger.error("访问通用文字识别(标准版)异常!", e);
}finally {
try{
if (writer != null){
writer.close();
}
if (reader != null){
reader.close();
}
if (connection != null){
connection.disconnect();
}
}catch (IOException e){
logger.error("关闭连接失败!", e);
}
}
Map<String, String> result = JSON.parseObject(builder.toString(), new TypeReference<Map<String, String>>(){});
return result;
}
/***
* 获取token连接
* @author 司马公和
* @date 2023/1/11 18:46
* @param []
* @return
*/
public static String getAccessTokenUrl(){
String accessTokenUrl = FieldEnum.AUTH_URL.val
.concat(FieldEnum.GRANT_TYPE.val).concat(AND_SIGN)
.concat(FieldEnum.CLIENT_ID.val).concat(AND_SIGN)
.concat(FieldEnum.CLIENT_SECRET.val);
return accessTokenUrl;
}
}
测试类
import com.alibaba.fastjson.JSON;
import util.CustomUtils;
import java.util.Map;
public class Main {
public static void main(String[] args) {
try{
String imagePath = "图片绝对地址";
// 获取Token 注:Token不需要每一次都获取,可进行缓存过期自动更新
Map<String, String> token = CustomUtils.getToken();
System.out.println("token:" + token);
// 通用文字识别(标准版)
Map<String, String> map = CustomUtils.generalBasic(imagePath);
System.out.println("通用文字识别(标准版):" + JSON.toJSONString(map));
// 增值税发票识别
map = CustomUtils.vatInvoice(imagePath);
System.out.println("增值税发票识别:" + JSON.toJSONString(map));
// 增值税发票验真
map = CustomUtils.vatInvoiceVerification(map);
System.out.println("增值税发票验真:" + JSON.toJSONString(map));
}catch (Exception e){
System.out.println("异常!");
e.printStackTrace();
}
}
}
效果
注:由于验真次数已用完,无法获取详细信息,查验成功且发票为真VerifyMessage字段返回。“查验成功发票一致“