场景:
在图片中查找是否包含某个关键字
解决方案:
百度OCR可每月可使用2000次,包含(高精度识别,高精度含位置识别,标准识别,标准含位 置识别)以及 普通表格识别(每天200次)。利用限制次数,如果第一个接口使用完成,则使用第二个接口,按此逻辑依次调用。调用成功次数缓存redis,根据实际情况设置过期时间,如果次数已满则调用次数没满的接口。
package com.lanlinker.cloud.labor.util;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.lanlinker.cloud.labor.component.HttpRequestComponent;
import com.lanlinker.cloud.labor.config.ApplicationContextHelper;
import io.minio.MinioClient;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.redis.core.RedisTemplate;
import java.io.*;
import java.net.URLEncoder;
import java.util.*;
import java.util.concurrent.TimeUnit;
@Slf4j
public class OCRUtil {
private static HttpRequestComponent httpRequestComponent;
public static HttpRequestComponent getHttpRequestComponent() {
if(httpRequestComponent==null){
httpRequestComponent = new HttpRequestComponent();
return httpRequestComponent;
}
return httpRequestComponent;
}
//获取token
private static String TOKEN_URL = "https://aip.baidubce.com/oauth/2.0/token";
//高精度识别
private static String HIGH_PRECISION_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic";
//高精度含位置版
private static String HIGH_PRECISION_SEAT_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate";
//标准版
private static String STANDARD_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic";
//标准含位置版
private static String STANDARD_SEAT_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/general";
//表格文字识别
private static String FORM_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/form";
private static final String CLIENT_ID = "";
private static final String CLIENT_SECRET = "";
public static String BAIDU_TOKEN = "BAIDU_TOKEN";
public static String HIGH_PRECISION = "HIGH_PRECISION";
public static String HIGH_PRECISION_SEAT = "HIGH_PRECISION_SEAT";
public static String STANDARD = "STANDARD";
public static String STANDARD_SEAT = "STANDARD_SEAT";
public static String FORM = "FORM";
private static volatile Integer index = 0;
private static RedisTemplate redisTemplate;
private static Set<String> set = new HashSet<>();
public static RedisTemplate getRedisTemplate() {
if(redisTemplate==null){
redisTemplate = (RedisTemplate)ApplicationContextHelper.getBean("redisTemplate");
}
return redisTemplate;
}
private static String getToken() throws Exception{
Object token = getRedisTemplate().opsForValue().get(BAIDU_TOKEN);
if(token==null){
TOKEN_URL = TOKEN_URL + "?grant_type=client_credentials&client_id="+CLIENT_ID+"&client_secret="+CLIENT_SECRET;
String post = getHttpRequestComponent().execute("POST_FOR_URL",TOKEN_URL,null);
// 返回结果格式为Json字符串
JSONObject jsonObject = JSONObject.parseObject(post);
String accessToken = jsonObject.getString("access_token");
Integer expiresIn = jsonObject.getInteger("expires_in");
getRedisTemplate().opsForValue().set(BAIDU_TOKEN,accessToken,expiresIn, TimeUnit.SECONDS);
return accessToken;
}else{
return token.toString();
}
}
//用于token失效试调用
private static String getTokens() throws Exception{
TOKEN_URL = TOKEN_URL + "?grant_type=client_credentials&client_id="+CLIENT_ID+"&client_secret="+CLIENT_SECRET;
String post = getHttpRequestComponent().execute("POST_FOR_URL",TOKEN_URL,null);
// 返回结果格式为Json字符串
JSONObject jsonObject = JSONObject.parseObject(post);
String accessToken = jsonObject.getString("access_token");
Integer expiresIn = jsonObject.getInteger("expires_in");
getRedisTemplate().opsForValue().set(BAIDU_TOKEN,accessToken,expiresIn, TimeUnit.SECONDS);
return accessToken;
}
//高精度
public static Set<String> getAccurateBasicText(List<String> images, String ENDPOINT, String ACCESS_KEY, String SECRET_KEY, String BUCKET_NAME){
try {
String token = getToken();
HIGH_PRECISION_URL = HIGH_PRECISION_URL + "?access_token="+token;
Integer value = 0 ;
Iterator<String> iterator = images.iterator();
while (iterator.hasNext()){
String path = iterator.next();
InputStream in = getInputStream(path,ENDPOINT,ACCESS_KEY,SECRET_KEY,BUCKET_NAME);
byte[] fileByte = getFileBytes(in);// 获取图片字节数组
String base64UrlencodedImg = base64Urlencode(fileByte);// 编码
String post = getHttpRequestComponent().execute("POST_FOR_URL", HIGH_PRECISION_URL, base64UrlencodedImg);
JSONObject resultsData = JSONObject.parseObject(post);
Integer errorCode = resultsData.getInteger("error_code");
//免费的用完了哦!赶紧去充钱。。。
if(errorCode!=null){
log.error("errorCode===>"+errorCode);
if(errorCode==17||errorCode==19){
Long expire = getRedisTemplate().getExpire(HIGH_PRECISION, TimeUnit.SECONDS);
if (expire != null && expire >0) {
getRedisTemplate().opsForValue().set(HIGH_PRECISION,2000,expire,TimeUnit.SECONDS);
}
//想不到吧!免费的不止一个哟。。。
return getAccurateBasicSeatText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}else if(errorCode==18){
//想不到吧!免费的不止一个哟。。。
return getAccurateBasicSeatText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}else if(errorCode==100||errorCode==110||errorCode==111){
if (index==3) {
return new HashSet<>();
}
index++;
//token失效重新获取token
getTokens();
return getAccurateBasicText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}
}
JSONArray wordsResult = resultsData.getJSONArray("words_result");
if (wordsResult==null) {
continue;
}
for (int j = 0; j < wordsResult.size(); j++) {
JSONObject jsonObject = wordsResult.getJSONObject(j);
set.add(jsonObject.getString("words").trim());
}
value++;
iterator.remove();
}
/*for (int i = 0; i < images.size(); i++) {
String path = images.get(i);
}*/
Object size = getRedisTemplate().opsForValue().get(HIGH_PRECISION);
if (size!=null) {
Integer integer = Integer.valueOf(size.toString());
Long expire = getRedisTemplate().getExpire(HIGH_PRECISION, TimeUnit.SECONDS);
if (expire != null && expire >0) {
getRedisTemplate().opsForValue().set(HIGH_PRECISION,integer+value,expire,TimeUnit.SECONDS);
}
}else{
getRedisTemplate().opsForValue().set(HIGH_PRECISION,value,30,TimeUnit.DAYS);
}
return set;
}catch (Exception e){
e.printStackTrace();
return getAccurateBasicSeatText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}
}
//高精度含位置版
public static Set<String> getAccurateBasicSeatText(List<String> images,String ENDPOINT,String ACCESS_KEY,String SECRET_KEY,String BUCKET_NAME){
try {
String token = getToken();
HIGH_PRECISION_SEAT_URL = HIGH_PRECISION_SEAT_URL + "?access_token="+token;
Integer value = 0;
Iterator<String> iterator = images.iterator();
while (iterator.hasNext()){
//for (int i = 0; i < images.size(); i++) {
String path = iterator.next();//images.get(i);
InputStream in = getInputStream(path,ENDPOINT,ACCESS_KEY,SECRET_KEY,BUCKET_NAME);
byte[] fileByte = getFileBytes(in);// 获取图片字节数组
String base64UrlencodedImg = base64Urlencode(fileByte);// 编码
String post = getHttpRequestComponent().execute("POST_FOR_URL", HIGH_PRECISION_SEAT_URL, base64UrlencodedImg);
JSONObject resultsData = JSONObject.parseObject(post);
Integer errorCode = resultsData.getInteger("error_code");
//免费的用完了哦!赶紧去充钱。。。
if(errorCode!=null){
log.error("errorCode===>"+errorCode);
if(errorCode==17||errorCode==19){
Long expire = getRedisTemplate().getExpire(HIGH_PRECISION_SEAT, TimeUnit.SECONDS);
if (expire != null && expire >0) {
getRedisTemplate().opsForValue().set(HIGH_PRECISION_SEAT,2000,expire,TimeUnit.SECONDS);
}
//想不到吧!免费的不止一个哟。。。
return getStandardText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}else if(errorCode==18){
//想不到吧!免费的不止一个哟。。。
return getStandardText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}else if(errorCode==100||errorCode==110||errorCode==111){
if (index==3) {
return new HashSet<>();
}
index++;
//token失效重新获取token
getTokens();
return getAccurateBasicSeatText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}
}
JSONArray wordsResult = resultsData.getJSONArray("words_result");
if (wordsResult==null) {
continue;
}
for (int j = 0; j < wordsResult.size(); j++) {
JSONObject jsonObject = wordsResult.getJSONObject(j);
set.add(jsonObject.getString("words").trim());
}
value++;
iterator.remove();
}
Object size = getRedisTemplate().opsForValue().get(HIGH_PRECISION_SEAT);
if (size!=null) {
Integer integer = Integer.valueOf(size.toString());
Long expire = getRedisTemplate().getExpire(HIGH_PRECISION_SEAT, TimeUnit.SECONDS);
if (expire != null && expire >0) {
getRedisTemplate().opsForValue().set(HIGH_PRECISION_SEAT,integer+value,expire,TimeUnit.SECONDS);
}
}else{
getRedisTemplate().opsForValue().set(HIGH_PRECISION_SEAT,value,30,TimeUnit.DAYS);
}
return set;
}catch (Exception e){
e.printStackTrace();
return getStandardText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}
}
//标准
public static Set<String> getStandardText(List<String> images,String ENDPOINT,String ACCESS_KEY,String SECRET_KEY,String BUCKET_NAME){
try {
String token = getToken();
STANDARD_URL = STANDARD_URL + "?access_token="+token;
Integer value = 0;
//for (int i = 0; i < images.size(); i++) {
Iterator<String> iterator = images.iterator();
while (iterator.hasNext()){
String path = iterator.next();//images.get(i);
InputStream in = getInputStream(path,ENDPOINT,ACCESS_KEY,SECRET_KEY,BUCKET_NAME);
byte[] fileByte = getFileBytes(in);// 获取图片字节数组
String base64UrlencodedImg = base64Urlencode(fileByte);// 编码
String post = getHttpRequestComponent().execute("POST_FOR_URL", STANDARD_URL, base64UrlencodedImg);
JSONObject resultsData = JSONObject.parseObject(post);
Integer errorCode = resultsData.getInteger("error_code");
//免费的用完了哦!赶紧去充钱。。。
if(errorCode!=null){
log.error("errorCode===>"+errorCode);
if(errorCode==17||errorCode==19){
Long expire = getRedisTemplate().getExpire(STANDARD, TimeUnit.SECONDS);
if (expire != null && expire >0) {
getRedisTemplate().opsForValue().set(STANDARD,2000,expire,TimeUnit.SECONDS);
}
//想不到吧!免费的不止一个哟。。。
return getStandardSeatText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}else if(errorCode==18){
//想不到吧!免费的不止一个哟。。。
return getStandardSeatText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}else if(errorCode==100||errorCode==110||errorCode==111){
if (index==3) {
return new HashSet<>();
}
index++;
//token失效重新获取token
getTokens();
return getStandardText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}
}
JSONArray wordsResult = resultsData.getJSONArray("words_result");
if (wordsResult==null) {
continue;
}
for (int j = 0; j < wordsResult.size(); j++) {
JSONObject jsonObject = wordsResult.getJSONObject(j);
set.add(jsonObject.getString("words").trim());
}
value++;
iterator.remove();
}
Object size = getRedisTemplate().opsForValue().get(STANDARD);
if (size!=null) {
Integer integer = Integer.valueOf(size.toString());
Long expire = getRedisTemplate().getExpire(STANDARD, TimeUnit.SECONDS);
if (expire != null && expire > 0) {
getRedisTemplate().opsForValue().set(STANDARD,integer+value,expire,TimeUnit.SECONDS);
}
}else{
getRedisTemplate().opsForValue().set(STANDARD,value,30,TimeUnit.DAYS);
}
return set;
}catch (Exception e){
e.printStackTrace();
return getStandardSeatText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}
}
//标准含位置
public static Set<String> getStandardSeatText(List<String> images,String ENDPOINT,String ACCESS_KEY,String SECRET_KEY,String BUCKET_NAME){
try {
String token = getToken();
STANDARD_SEAT_URL = STANDARD_SEAT_URL + "?access_token="+token;
Integer value = 0;
//for (int i = 0; i < images.size(); i++) {
Iterator<String> iterator = images.iterator();
while (iterator.hasNext()){
String path = iterator.next();//images.get(i);
InputStream in = getInputStream(path,ENDPOINT,ACCESS_KEY,SECRET_KEY,BUCKET_NAME);
byte[] fileByte = getFileBytes(in);// 获取图片字节数组
String base64UrlencodedImg = base64Urlencode(fileByte);// 编码
String post = getHttpRequestComponent().execute("POST_FOR_URL", STANDARD_SEAT_URL, base64UrlencodedImg);
JSONObject resultsData = JSONObject.parseObject(post);
Integer errorCode = resultsData.getInteger("error_code");
//免费的用完了哦!赶紧去充钱。。。
if(errorCode!=null){
log.error("errorCode===>"+errorCode);
if(errorCode==17||errorCode==19){
Long expire = getRedisTemplate().getExpire(STANDARD_SEAT, TimeUnit.SECONDS);
if (expire != null && expire >0) {
getRedisTemplate().opsForValue().set(STANDARD_SEAT,2000,expire,TimeUnit.SECONDS);
}
//想不到吧!免费的不止一个哟。。。
return getFromText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}else if(errorCode==18){
//想不到吧!免费的不止一个哟。。。
return getFromText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}else if(errorCode==100||errorCode==110||errorCode==111){
if (index==3) {
return new HashSet<>();
}
index++;
//token失效重新获取token
getTokens();
return getStandardSeatText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}
}
JSONArray wordsResult = resultsData.getJSONArray("words_result");
if (wordsResult==null) {
continue;
}
for (int j = 0; j < wordsResult.size(); j++) {
JSONObject jsonObject = wordsResult.getJSONObject(j);
set.add(jsonObject.getString("words").trim());
}
value++;
iterator.remove();
}
Object size = getRedisTemplate().opsForValue().get(STANDARD_SEAT);
if (size!=null) {
Integer integer = Integer.valueOf(size.toString());
Long expire = getRedisTemplate().getExpire(STANDARD_SEAT, TimeUnit.SECONDS);
if (expire != null && expire >0) {
getRedisTemplate().opsForValue().set(STANDARD_SEAT,integer+value,expire,TimeUnit.SECONDS);
}
}else{
getRedisTemplate().opsForValue().set(STANDARD_SEAT,value,30,TimeUnit.DAYS);
}
return set;
}catch (Exception e){
e.printStackTrace();
return getFromText(images,ENDPOINT, ACCESS_KEY, SECRET_KEY,BUCKET_NAME);
}
}
//表格
public static Set<String> getFromText(List<String> images,String ENDPOINT,String ACCESS_KEY,String SECRET_KEY,String BUCKET_NAME){
try {
String token = getToken();
FORM_URL = FORM_URL + "?access_token="+token;
Integer value = 0;
for (int i = 0; i < images.size(); i++) {
String path = images.get(i);
InputStream in = getInputStream(path,ENDPOINT,ACCESS_KEY,SECRET_KEY,BUCKET_NAME);
byte[] fileByte = getFileBytes(in);// 获取图片字节数组
String base64UrlencodedImg = base64Urlencode(fileByte);// 编码
String post = getHttpRequestComponent().execute("POST_FOR_URL", FORM_URL, base64UrlencodedImg);
JSONObject resultsData = JSONObject.parseObject(post);
JSONArray formResult = resultsData.getJSONArray("forms_result");
if (formResult==null) {
continue;
}
for (int j = 0; j < formResult.size(); j++) {
JSONObject jsonObject = formResult.getJSONObject(j);
JSONArray body = jsonObject.getJSONArray("body");
for (int y = 0; y < body.size(); y++) {
JSONObject words = body.getJSONObject(y);
set.add(words.getString("words").trim());
}
JSONArray footer = jsonObject.getJSONArray("footer");
for (int x = 0; x < footer.size(); x++) {
JSONObject words = footer.getJSONObject(x);
set.add(words.getString("words").trim());
}
}
value++;
}
Object size = getRedisTemplate().opsForValue().get(FORM);
if (size!=null) {
Integer integer = Integer.valueOf(size.toString());
Long expire = getRedisTemplate().getExpire(FORM, TimeUnit.SECONDS);
if (expire != null && expire > 0) {
getRedisTemplate().opsForValue().set(FORM,integer+value,expire,TimeUnit.SECONDS);
}
}else{
getRedisTemplate().opsForValue().set(FORM,value,1,TimeUnit.DAYS);
}
}catch (Exception e){
e.printStackTrace();
}
return set;
}
/**
* 图片转字节数组
* @param
* @return 图片字节数组
*/
private static byte[] getFileBytes(InputStream in){
byte[] buffer = null;
try {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
byte[] b = new byte[1000];
int n;
while ((n = in.read(b)) != -1) {
bos.write(b, 0, n);
}
in.close();
bos.close();
buffer = bos.toByteArray();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buffer;
}
/**
* 对字节数组进行base64编码与url编码
* @param b
* @return
*/
private static String base64Urlencode(byte[] b) {
byte[] base64Img = Base64.getEncoder().encode(b);
try {
String base64UrlencodedImg = URLEncoder.encode(new String(base64Img), "utf-8");
return base64UrlencodedImg;
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
//注意:这里我用的是MinIO附件服务器里拿附件获取文件流,可根据自己的需求来做更改
private static InputStream getInputStream(String path,String ENDPOINT,String ACCESS_KEY,String SECRET_KEY,String BUCKET_NAME) throws Exception{
MinioClient minioClient = new MinioClient(ENDPOINT, ACCESS_KEY, SECRET_KEY);
String[] arr = path.split("/");
String paths = "";
for (int i = 4; i < arr.length; i++) {
paths = paths+"/"+arr[i];
}
InputStream in = minioClient.getObject(BUCKET_NAME, paths);
return in;
}
}
package com.lanlinker.cloud.labor.component;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.ParseException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.springframework.stereotype.Component;
import java.io.IOException;
@Slf4j
public class HttpRequestComponent {
private static final String POST = "POST";
private static final String POST_FOR_URL = "POST_FOR_URL";
public String execute(String method, String url, String param) throws Exception {
switch (method) {
case POST:
return post(param, url);
case POST_FOR_URL:
return postForUrl(param,url);
default:
return null;
}
}
private static String postForUrl(String param,String url){
String result = "";
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpPost httpPost = new HttpPost(url);
//每个接口的Content-Type可能不同
httpPost.setHeader("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8");
try {
if(param!=null){
HttpEntity entity = new StringEntity("image=" + param);
httpPost.setEntity(entity);
}
HttpResponse response = httpClient.execute(httpPost);
result = EntityUtils.toString(response.getEntity());
return result;
}catch (Exception e){
e.printStackTrace();
}
return null;
}
private String post(String param, String url) throws IOException {
String entityStr;
CloseableHttpClient httpClient = HttpClientBuilder.create().build();
HttpPost httpPost = new HttpPost(url);
CloseableHttpResponse response = null;
try {
httpPost.setHeader("Content-Type", "application/json");
StringEntity stringEntity = new StringEntity(param, "UTF-8");
httpPost.setEntity(stringEntity);
httpPost.setConfig(getConfig());
response = httpClient.execute(httpPost);
int statusCode = response.getStatusLine().getStatusCode();
HttpEntity entity = response.getEntity();
entityStr = EntityUtils.toString(entity);
if (statusCode != HttpStatus.SC_OK) {
log.error("http请求失败, url:{}", url);
}
return entityStr;
} catch (IOException | ParseException e) {
throw new RuntimeException(e);
} finally {
if (httpClient != null) {
httpClient.close();
}
if (response != null) {
response.close();
}
httpPost.abort();
}
}
/**
* 获取请求配置
*
* @return config
*/
private RequestConfig getConfig() {
return RequestConfig.custom()
.setConnectTimeout(60000)
.setConnectionRequestTimeout(60000)
.setSocketTimeout(60000).build();
}
private String get(String param, String url) {
return null;
}
}