首先你在阿里云购买一个图像识别的API(500次免费),它会给你一个Appcode,拿到这个Appcode,我们在项目中会用到它。我用的是这个:印刷文字识别-文档小说图片文字识别
识别的逻辑:
我们通过Java的I/O操作获取图片文件,然后将该文件转化为Base64编码,然后用它请求Api的调用地址,获取返回Json后,解析Json,把结果输出到文件,并输出到控制台
很简单对不对!我写了一个Demo放在了GitHub上,链接是https://github.com/theazet/ocr,我们看一下实际的运行代码
/**
* 工具类
*/
public class Utils {
/**
* 输出获取的Json返回值,并把它输出到控制台和文件
* @param content 获取的Json返回值
*/
public static void output(String content) {
System.out.println(content);
File file = new File("D:\\result.txt"); //这里选择输出文件的地址
try {
FileWriter writer = new FileWriter(file, true);
writer.write(content);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 解析Json
* @param string 获取response中的Json
*/
public static void prase(String string){
JSONObject jsonObject=JSONObject.parseObject(string);
JSONArray jsonArray=jsonObject.getJSONArray("prism_wordsInfo");
for (int i=0;i<jsonArray.size();i++){
JSONObject newjsonObject=(JSONObject) jsonArray.get(i);
output(newjsonObject.getString("word"));
}
}
/**
* 将文件读取并转化为Base64字符串
* @param fileName 文件名
* @return Base64字符串
* @throws Exception
*/
public static String changeToBase64(String fileName) throws Exception{
File file = new File(fileName);
FileInputStream inputFile = new FileInputStream(file);
byte[] buffer = new byte[(int)file.length()];
inputFile.read(buffer);
inputFile.close();
String base64Code=new BASE64Encoder().encode(buffer);
return base64Code;
}
}
//Http请求的工具类
public class HttpUtils {
public static HttpResponse doPost(String host, String path, String method,
Map<String, String> headers,
Map<String, String> querys,
String body)
throws Exception {
HttpClient httpClient = wrapClient(host);
HttpPost request = new HttpPost(buildUrl(host, path, querys));
for (Map.Entry<String, String> e : headers.entrySet()) {
request.addHeader(e.getKey(), e.getValue());
}
if (StringUtils.isNotBlank(body)) {
request.setEntity(new StringEntity(body, "utf-8"));
}
return httpClient.execute(request);
}
private static String buildUrl(String host, String path, Map<String, String> querys) throws UnsupportedEncodingException {
StringBuilder sbUrl = new StringBuilder();
sbUrl.append(host);
if (!StringUtils.isBlank(path)) {
sbUrl.append(path);
}
if (null != querys) {
StringBuilder sbQuery = new StringBuilder();
for (Map.Entry<String, String> query : querys.entrySet()) {
if (0 < sbQuery.length()) {
sbQuery.append("&");
}
if (StringUtils.isBlank(query.getKey()) && !StringUtils.isBlank(query.getValue())) {
sbQuery.append(query.getValue());
}
if (!StringUtils.isBlank(query.getKey())) {
sbQuery.append(query.getKey());
if (!StringUtils.isBlank(query.getValue())) {
sbQuery.append("=");
sbQuery.append(URLEncoder.encode(query.getValue(), "utf-8"));
}
}
}
if (0 < sbQuery.length()) {
sbUrl.append("?").append(sbQuery);
}
}
return sbUrl.toString();
}
private static HttpClient wrapClient(String host) {
HttpClient httpClient = new DefaultHttpClient();
if (host.startsWith("https://")) {
sslClient(httpClient);
}
return httpClient;
}
private static void sslClient(HttpClient httpClient) {
try {
SSLContext ctx = SSLContext.getInstance("TLS");
X509TrustManager tm = new X509TrustManager() {
public X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(X509Certificate[] xcs, String str) {
}
public void checkServerTrusted(X509Certificate[] xcs, String str) {
}
};
ctx.init(null, new TrustManager[] { tm }, null);
SSLSocketFactory ssf = new SSLSocketFactory(ctx);
ssf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
ClientConnectionManager ccm = httpClient.getConnectionManager();
SchemeRegistry registry = ccm.getSchemeRegistry();
registry.register(new Scheme("https", 443, ssf));
} catch (KeyManagementException ex) {
throw new RuntimeException(ex);
} catch (NoSuchAlgorithmException ex) {
throw new RuntimeException(ex);
}
}
}
public class StartOcr {
public static void main(String[] args) {
String host = "https://ocrapi-document.taobao.com";
String path = "/ocrservice/document";
String method = "POST";
String appcode = "这里填写你的AppCode";
Map<String, String> headers = new HashMap<String, String>();
headers.put("Authorization", "APPCODE " + appcode);
headers.put("Content-Type", "application/json; charset=UTF-8");
Map<String, String> querys = new HashMap<String, String>();
String bodys = null;
try {
bodys = "{\"img\":\""+ changeToBase64("D:\\ocr.jpg")+"\",\"prob\":false}"; //这里输入图片文件地址
} catch (Exception e) {
e.printStackTrace();
}
try {
HttpResponse response = HttpUtils.doPost(host, path, method, headers, querys, bodys);
prase(EntityUtils.toString(response.getEntity()));
} catch (Exception e) {
e.printStackTrace();
}
}
}
这里我是通过Maven构建的项目,贴上pom.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<packaging>war</packaging>
<groupId>Ocr</groupId>
<artifactId>Ocr</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.15</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.2.1</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
<version>9.3.7.v20160115</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.5</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>com.example.ocr.StartOcr</mainClass>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
</manifest>
</archive>
<classesDirectory>
</classesDirectory>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
测试用例
输出结果