1.获元素区域验证码图片
使用selenium/appium自带getScreenshotAs方法
//获取元素区域的图片,以BASE64字符格式返回
String imgBase64= element.getScreenshotAs(OutputType.BASE64);
return BaiDuImgOCR.getText(imgBase64);
2.请求百度服务接口
各语言相关请求代码都有
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.jayway.jsonpath.JsonPath;
import okhttp3.*;
import java.io.*;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
/**
* 百度图片文本识别
* 个人免费账户,每月1000次
*/
public class BaiDuImgOCR {
private static final String API_KEY = "**********************";
private static final String SECRET_KEY = "**************************";
static final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder().build();
/**
*
* @param imgBase64 图片的Base64字符
* @return 返回识别的第一行内容
* @throws IOException
*/
public static String getText(String imgBase64) throws IOException{
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
String encodedString = URLEncoder.encode(imgBase64, StandardCharsets.UTF_8);
RequestBody body = RequestBody.create(mediaType, "image="+encodedString+"&detect_direction=false¶graph=false&probability=false");
Request request = new Request.Builder()
.url("https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + getAccessToken())
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.addHeader("Accept", "application/json")
.build();
Response response = HTTP_CLIENT.newCall(request).execute();
return JsonPath.read(response.body().string(), "$.words_result[0].words");
}
/**
*
* @param imgBase64 图片的Base64字符
* @return 返回识别的全部内容List
* @throws IOException
*/
public static List<String> getTextList(String imgBase64) throws IOException{
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
String encodedString = URLEncoder.encode(imgBase64, StandardCharsets.UTF_8);
RequestBody body = RequestBody.create(mediaType, "image="+encodedString+"&detect_direction=false¶graph=false&probability=false");
Request request = new Request.Builder()
.url("https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + getAccessToken())
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.addHeader("Accept", "application/json")
.build();
Response response = HTTP_CLIENT.newCall(request).execute();
List<String> textList = new ArrayList<>();
JSONArray jsonArray = JSON.parseObject(response.body().string()).getJSONArray("words_result");
for (int i = 0; i < jsonArray.size(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
textList.add(jsonObject.getString("words"));
}
return textList;
}
/**
* 从用户的AK,SK生成鉴权签名(Access Token)
*
* @return 鉴权签名(Access Token)
* @throws IOException IO异常
*/
private static String getAccessToken() throws IOException {
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
RequestBody body = RequestBody.create(mediaType, "grant_type=client_credentials&client_id=" + API_KEY
+ "&client_secret=" + SECRET_KEY);
Request request = new Request.Builder()
.url("https://aip.baidubce.com/oauth/2.0/token")
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.build();
Response response = HTTP_CLIENT.newCall(request).execute();
return JSON.parseObject(response.body().string()).getString("access_token");
}