1.这是从本地上传图片,本质是将上传的图片转换为base64后进行解析
首先导入pom依赖
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.9.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.13</version>
</dependency>
接口实现
@PostMapping("/recognize-img")
@Operation(summary = "图片文字识别",description = "图片识别")
public String recognizeImage(@RequestParam("file") MultipartFile file) {
try {
// 将文件转换为 Base64 编码并进行 OCR 识别
String base64Image = baiduOcrService.convertToBase64(file,true);
return baiduOcrService.recognizeImage(base64Image);
} catch (IOException e) {
e.printStackTrace();
return "Error occurred while recognizing image.";
}
}
这是OCR的实现过程
package com.jt.console.service.impl;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.jt.common.beans.ServiceAssert;
import okhttp3.*;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.Base64;
import java.util.List;
import java.util.Arrays;
/**
* 百度OCR识别实现类
*/
@Service("baiduOcrServiceImpl")
public class BaiduOcrServiceImpl {
@Value("${baidu.ocr.apiKey}")
private String API_KEY; // 客户端id
@Value("${baidu.ocr.secretKey}")
private String SECRET_KEY; // 客户端秘钥
// 支持的图片格式列表
private static final List<String> SUPPORTED_FORMATS = Arrays.asList("png", "jpg", "jpeg", "bmp", "gif");
// 构建 OkHttpClient 实例
private static final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder().build();
// 获取 Access Token
private String getAccessToken() throws IOException {
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
RequestBody body = RequestBody.create(mediaType, "grant_type=client_credentials&client_id=" + API_KEY
+ "&client_secret=" + SECRET_KEY);
Request request = new Request.Builder()
.url("https://aip.baidubce.com/oauth/2.0/token")
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.build();
Response response = HTTP_CLIENT.newCall(request).execute();
if (!response.isSuccessful()) {
//throw new IOException("Unexpected code " + response);
// 自定义提示信息
String errorMessage = "OCR request failed. Status code: " + response.code() + ", Message: " + response.message();
ServiceAssert.isTrue(false, errorMessage);
}
String responseBody = response.body().string();
JSONObject jsonObject = JSON.parseObject(responseBody);
return jsonObject.getString("access_token");
}
// 调用 OCR 接口,返回结果
public String recognizeImage(String base64Image) throws IOException {
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
RequestBody body = RequestBody.create(mediaType, "image=" + base64Image + "&detect_direction=false¶graph=false&probability=false");
Request request = new Request.Builder()
.url("https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + getAccessToken())
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.addHeader("Accept", "application/json")
.build();
try (Response response = HTTP_CLIENT.newCall(request).execute()) {
if (!response.isSuccessful()) {
//throw new IOException("Unexpected code " + response);
// 自定义提示信息
String errorMessage = "Failed to obtain access token. Status code: " + response.code() + ", Message: " + response.message();
ServiceAssert.isTrue(false, errorMessage);
}
return formatOcrResult(response.body().string());
}
}
// 将 MultipartFile 转换为 Base64 编码
public String convertToBase64(MultipartFile file, boolean urlEncode) throws IOException {
// 检查图片格式
String filename = file.getOriginalFilename();
if (filename == null) {
ServiceAssert.isTrue(false, "文件名为空");
}
String extension = filename.substring(filename.lastIndexOf('.') + 1).toLowerCase();
if (!SUPPORTED_FORMATS.contains(extension)) {
ServiceAssert.isTrue(false, "不支持的图片格式: " + extension);
}
// 从 MultipartFile 中获取字节数组
byte[] bytes = file.getBytes();
// 将字节数组编码为 Base64 字符串
String base64 = Base64.getEncoder().encodeToString(bytes);
// 如果需要 URL 编码
if (urlEncode) {
base64 = URLEncoder.encode(base64, "utf-8");
}
return base64;
}
//组装返回OCR识别的结果
public static String formatOcrResult(String ocrResult) {
StringBuilder resultText = new StringBuilder();
try {
// 解析 OCR 结果
JSONObject jsonObject = JSON.parseObject(ocrResult);
// 检查是否包含 words_result 数组
if (jsonObject.containsKey("words_result")) {
var wordsResult = jsonObject.getJSONArray("words_result");
if (wordsResult != null && !wordsResult.isEmpty()) {
for (int i = 0; i < wordsResult.size(); i++) {
JSONObject wordObject = wordsResult.getJSONObject(i);
String word = wordObject.getString("words");
if (word != null && !word.isEmpty()) {
resultText.append(word).append(" ");
}
}
} else {
// 如果没有识别到文字,直接返回空值
return "";
}
} else {
// OCR 结果中不包含 words_result,也返回空值
return "";
}
} catch (Exception e) {
ServiceAssert.isTrue(false,e.getMessage());
//resultText.append("处理 OCR 结果时出错:").append(e.getMessage());
}
return resultText.toString().trim();
}
}
2.这是上传为图片链接去进行OCR识别
导入依赖
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.9.0</version>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20210307</version> <!-- 或者最新版本 -->
</dependency>
接口实现
@RequestMapping("/api")
@RestController
@AllArgsConstructor
@CrossOrigin(origins = "*")
public class OcrController {
private final OcrServiceImpl ocrService;
@GetMapping("/ocr")
public String recognizeImage(@RequestParam String imageUrl) {
try {
return BaiduOcrService.recognizeImage(imageUrl);
} catch (IOException e) {
e.printStackTrace();
return "Error occurred while recognizing image.";
}
}
}
service层实现
package com.example.test.controller.ocr;
import okhttp3.*;
import org.json.JSONObject;
import java.io.IOException;
public class BaiduOcrService {
private static final String API_KEY = "";
private static final String SECRET_KEY = "";
private static final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder().build();
// 获取 Access Token
private static String getAccessToken() throws IOException {
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
RequestBody body = RequestBody.create(mediaType, "grant_type=client_credentials&client_id=" + API_KEY
+ "&client_secret=" + SECRET_KEY);
Request request = new Request.Builder()
.url("https://aip.baidubce.com/oauth/2.0/token")
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.build();
Response response = HTTP_CLIENT.newCall(request).execute();
return new JSONObject(response.body().string()).getString("access_token");
}
// 调用 OCR 接口
public static String recognizeImage(String imageUrl) throws IOException {
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
RequestBody body = RequestBody.create(mediaType, "url=" + imageUrl + "&detect_direction=false¶graph=false&probability=false");
Request request = new Request.Builder()
.url("https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + getAccessToken())
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.addHeader("Accept", "application/json")
.build();
Response response = HTTP_CLIENT.newCall(request).execute();
return response.body().string();
}
}
以上就是OCR实现2种过程