官方开发文档
https://cloud.baidu.com/doc/OCR/s/Ek3h7xypm
模板创建
官网有教程
识别多位小数据的数据我用的常规字段类型,如果你用小写数字金额类型识别会把最后一位识别为0,或者可能识别不到
依赖
<!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp -->
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.12.0</version>
</dependency>
static final String API_KEY = "填你自己的"; static final String SECRET_KEY = "填你自己的"; static final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder().build(); @PostMapping(value = "/testProcess",consumes = MediaType.MULTIPART_FORM_DATA_VALUE) public List<HashMap<String,String>> testProcessImage(@RequestParam("file")MultipartFile file) throws IOException { byte[] imageBytes = file .getBytes(); okhttp3.MediaType mediaType = okhttp3.MediaType.parse("application/x-www-form-urlencoded"); // RequestBody.create 第二个参数根据自己的情况来,查官方文档 // 我这里是根据上传的文件内容(还可以根据文件路径) RequestBody body = RequestBody.create(mediaType, "image="+URLEncoder.encode(Base64Util.encode(imageBytes), "UTF-8") +"&templateSign=ace62e52ce968079a92ec15d2544d1e9"); Request request = new Request.Builder() .url("https://aip.baidubce.com/rest/2.0/solution/v1/iocr/recognise/finance?access_token=" + getAccessToken()) .method("POST", body) .addHeader("Content-Type", "application/x-www-form-urlencoded") .addHeader("Accept", "application/json") .build(); Response response = HTTP_CLIENT.newCall(request).execute(); JSONObject jsonObject = new JSONObject(Objects.requireNonNull(response.body()).string()); // 下面是对返回值的解析 // 这里我只获取识别到的值(word_name,word(具体值)) JSONArray ret = jsonObject.getJSONObject("data").getJSONArray("ret"); HashMap<String,String> data = new HashMap<>(); List<HashMap<String,String>> datas = new ArrayList<>(); System.out.println(ret.length()); for (int i = 0; i < ret.length()-1 ; i++) { JSONObject phoneNumberObj = ret.getJSONObject(i); data.put(phoneNumberObj.getString("word_name"),phoneNumberObj.getString("word")); data = new HashMap<>(); datas.add(data); } return datas; }
下面是返回结果的示例
这是我自己的测试模板实例,可根据具体情况改动
{
"errorCode": 0,
"errorMsg": "",
"logId": null,
"data": {
"ret": [
{
"probability": {
"average": 0.9952911139,
"min": 0.976130724,
"variance": 0.00005427112774
},
"location": {
"top": 3421,
"left": 2115,
"width": 37,
"height": 150
},
"word_name": "valley_price",
"word": "0.220"
},
{
"probability": {
"average": 0.9979573488,
"min": 0.9919402003,
"variance": 0.000006120805665
},
"location": {
"top": 3418,
"left": 2179,
"width": 36,
"height": 150
},
"word_name": "flat_price",
"word": "0.6320"
},
{
"probability": {
"average": 0.9959807396,
"min": 0.9883592129,
"variance": 0.000009529698218
},
"location": {
"top": 1647,
"left": 2443,
"width": 43,
"height": 207
},
"word_name": "account_number",
"word": "0388*12"
},
{
"probability": {
"average": 0.9951527715,
"min": 0.9746937156,
"variance": 0.00006121824845
},
"location": {
"top": 3638,
"left": 2175,
"width": 37,
"height": 147
},
"word_name": "flat_charge",
"word": "918.38"
},
{
"probability": {
"average": 0.9968527555,
"min": 0.9824381471,
"variance": 0.00003009658758
},
"location": {
"top": 3418,
"left": 2239,
"width": 36,
"height": 150
},
"word_name": "peak_price",
"word": "1.030"
},
{
"probability": {
"average": 0.9971604347,
"min": 0.9900932908,
"variance": 0.00001060384693
},
"location": {
"top": 3674,
"left": 2112,
"width": 36,
"height": 111
},
"word_name": "valley_charge",
"word": "43.1"
},
{
"probability": {
"average": 0.9989025593,
"min": 0.9978717566,
"variance": 4.821478683e-7
},
"location": {
"top": 1640,
"left": 2506,
"width": 40,
"height": 117
},
"word_name": "month",
"word": "202401"
},
{
"probability": {
"average": 0.9845759273,
"min": 0.8883020878,
"variance": 0.001324576442
},
"location": {
"top": 3415,
"left": 2299,
"width": 40,
"height": 143
},
"word_name": "tip_price",
"word": "1.2*0"
},
{
"probability": {
"average": 0.9963476062,
"min": 0.9907190204,
"variance": 0.000006976139503
},
"location": {
"top": 3662,
"left": 2299,
"width": 36,
"height": 113
},
"word_name": "tip_charge",
"word": "67.77"
},
{
"probability": {
"average": 0.9945835471,
"min": 0.9784607291,
"variance": 0.00005564975436
},
"location": {
"top": 3631,
"left": 2235,
"width": 37,
"height": 147
},
"word_name": "peak_charge",
"word": "103.24"
},
{
"probability": {
"average": 0.999364078,
"min": 0.9970128536,
"variance": 7.170726235e-7
},
"location": {
"top": 660,
"left": 2443,
"width": 43,
"height": 553
},
"word_name": "username",
"word": "成都****有限责任公司"
}
],
"templateSign": "ace62e52ce968079a92ec15d2544d1e9",
"scores": 1,
"isStructured": true,
"logId": "171038434275039",
"version": 14,
"clockwiseAngle": 269.9
}
}
/** * 从用户的AK,SK生成鉴权签名(Access Token) * * @return 鉴权签名(Access Token) * @throws IOException IO异常 */ private static String getAccessToken() throws IOException { okhttp3.MediaType mediaType = okhttp3.MediaType.parse("application/x-www-form-urlencoded"); RequestBody body = RequestBody.create(mediaType, "grant_type=client_credentials&client_id=" + API_KEY + "&client_secret=" + SECRET_KEY); Request request = new Request.Builder() .url("https://aip.baidubce.com/oauth/2.0/token") .method("POST", body) .addHeader("Content-Type", "application/x-www-form-urlencoded") .build(); Response response = HTTP_CLIENT.newCall(request).execute(); return new JSONObject(response.body().string()).getString("access_token"); }
/**
* 获取文件base64编码
*
* @param path 文件路径
* @param urlEncode 如果Content-Type是application/x-www-form-urlencoded时,传true
* @return base64编码信息,不带文件头
* @throws IOException IO异常
*/
static String getFileContentAsBase64(String path, boolean urlEncode) throws IOException {
byte[] b = Files.readAllBytes(Paths.get(path));
String base64 = Base64.getEncoder().encodeToString(b);
if (urlEncode) {
base64 = URLEncoder.encode(base64, "utf-8");
}
return base64;
}