OCR识别之阿里云接口调用OcrAliyunBO

/**
 * Aliyun OCR 请求工具类
 * @author Albert
 * @date2020-03-15
 * @version 1.0
 */
public class HttpUtils {
	
	/**
	 * get
	 * 
	 * @param host
	 * @param path
	 * @param method
	 * @param headers
	 * @param querys
	 * @return
	 * @throws Exception
	 */
	public static HttpResponse doGet(String host, String path, String method, 
			Map<String, String> headers, 
			Map<String, String> querys)
            throws Exception {    	
    	HttpClient httpClient = wrapClient(host);

    	HttpGet request = new HttpGet(buildUrl(host, path, querys));
        for (Map.Entry<String, String> e : headers.entrySet()) {
        	request.addHeader(e.getKey(), e.getValue());
        }
        
        return httpClient.execute(request);
    }
	
	/**
	 * post form
	 * 
	 * @param host
	 * @param path
	 * @param method
	 * @param headers
	 * @param querys
	 * @param bodys
	 * @return
	 * @throws Exception
	 */
	public static HttpResponse doPost(String host, String path, String method, 
			Map<String, String> headers, 
			Map<String, String> querys, 
			Map<String, String> bodys)
            throws Exception {    	
    	HttpClient httpClient = wrapClient(host);

    	HttpPost request = new HttpPost(buildUrl(host, path, querys));
        for (Map.Entry<String, String> e : headers.entrySet()) {
        	request.addHeader(e.getKey(), e.getValue());
        }

        if (bodys != null) {
            List<NameValuePair> nameValuePairList = new ArrayList<NameValuePair>();

            for (String key : bodys.keySet()) {
                nameValuePairList.add(new BasicNameValuePair(key, bodys.get(key)));
            }
            UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(nameValuePairList, "utf-8");
            formEntity.setContentType("application/x-www-form-urlencoded; charset=UTF-8");
            request.setEntity(formEntity);
        }

        return httpClient.execute(request);
    }	
	
	/**
	 * Post String
	 * 
	 * @param host
	 * @param path
	 * @param method
	 * @param headers
	 * @param querys
	 * @param body
	 * @return
	 * @throws Exception
	 */
	public static HttpResponse doPost(String host, String path, String method, 
			Map<String, String> headers, 
			Map<String, String> querys, 
			String body)
            throws Exception {    	
    	HttpClient httpClient = wrapClient(host);

    	HttpPost request = new HttpPost(buildUrl(host, path, querys));
        for (Map.Entry<String, String> e : headers.entrySet()) {
        	request.addHeader(e.getKey(), e.getValue());
        }

        if (StringUtils.isNotBlank(body)) {
        	request.setEntity(new StringEntity(body, "utf-8"));
        }

        return httpClient.execute(request);
    }
	
	/**
	 * Post stream
	 * 
	 * @param host
	 * @param path
	 * @param method
	 * @param headers
	 * @param querys
	 * @param body
	 * @return
	 * @throws Exception
	 */
	public static HttpResponse doPost(String host, String path, String method, 
			Map<String, String> headers, 
			Map<String, String> querys, 
			byte[] body)
            throws Exception {    	
    	HttpClient httpClient = wrapClient(host);

    	HttpPost request = new HttpPost(buildUrl(host, path, querys));
        for (Map.Entry<String, String> e : headers.entrySet()) {
        	request.addHeader(e.getKey(), e.getValue());
        }

        if (body != null) {
        	request.setEntity(new ByteArrayEntity(body));
        }

        return httpClient.execute(request);
    }
	
	/**
	 * Put String
	 * @param host
	 * @param path
	 * @param method
	 * @param headers
	 * @param querys
	 * @param body
	 * @return
	 * @throws Exception
	 */
	public static HttpResponse doPut(String host, String path, String method, 
			Map<String, String> headers, 
			Map<String, String> querys, 
			String body)
            throws Exception {    	
    	HttpClient httpClient = wrapClient(host);

    	HttpPut request = new HttpPut(buildUrl(host, path, querys));
        for (Map.Entry<String, String> e : headers.entrySet()) {
        	request.addHeader(e.getKey(), e.getValue());
        }

        if (StringUtils.isNotBlank(body)) {
        	request.setEntity(new StringEntity(body, "utf-8"));
        }

        return httpClient.execute(request);
    }
	
	/**
	 * Put stream
	 * @param host
	 * @param path
	 * @param method
	 * @param headers
	 * @param querys
	 * @param body
	 * @return
	 * @throws Exception
	 */
	public static HttpResponse doPut(String host, String path, String method, 
			Map<String, String> headers, 
			Map<String, String> querys, 
			byte[] body)
            throws Exception {    	
    	HttpClient httpClient = wrapClient(host);

    	HttpPut request = new HttpPut(buildUrl(host, path, querys));
        for (Map.Entry<String, String> e : headers.entrySet()) {
        	request.addHeader(e.getKey(), e.getValue());
        }

        if (body != null) {
        	request.setEntity(new ByteArrayEntity(body));
        }

        return httpClient.execute(request);
    }
	
	/**
	 * Delete
	 *  
	 * @param host
	 * @param path
	 * @param method
	 * @param headers
	 * @param querys
	 * @return
	 * @throws Exception
	 */
	public static HttpResponse doDelete(String host, String path, String method, 
			Map<String, String> headers, 
			Map<String, String> querys)
            throws Exception {    	
    	HttpClient httpClient = wrapClient(host);

    	HttpDelete request = new HttpDelete(buildUrl(host, path, querys));
        for (Map.Entry<String, String> e : headers.entrySet()) {
        	request.addHeader(e.getKey(), e.getValue());
        }
        
        return httpClient.execute(request);
    }
	
	private static String buildUrl(String host, String path, Map<String, String> querys) throws UnsupportedEncodingException {
    	StringBuilder sbUrl = new StringBuilder();
    	sbUrl.append(host);
    	if (!StringUtils.isBlank(path)) {
    		sbUrl.append(path);
        }
    	if (null != querys) {
    		StringBuilder sbQuery = new StringBuilder();
        	for (Map.Entry<String, String> query : querys.entrySet()) {
        		if (0 < sbQuery.length()) {
        			sbQuery.append("&");
        		}
        		if (StringUtils.isBlank(query.getKey()) && !StringUtils.isBlank(query.getValue())) {
        			sbQuery.append(query.getValue());
                }
        		if (!StringUtils.isBlank(query.getKey())) {
        			sbQuery.append(query.getKey());
        			if (!StringUtils.isBlank(query.getValue())) {
        				sbQuery.append("=");
        				sbQuery.append(URLEncoder.encode(query.getValue(), "utf-8"));
        			}        			
                }
        	}
        	if (0 < sbQuery.length()) {
        		sbUrl.append("?").append(sbQuery);
        	}
        }
    	
    	return sbUrl.toString();
    }
	
	private static HttpClient wrapClient(String host) {
		HttpClient httpClient = new DefaultHttpClient();
		if (host.startsWith("https://")) {
			sslClient(httpClient);
		}
		
		return httpClient;
	}
	
	private static void sslClient(HttpClient httpClient) {
        try {
            SSLContext ctx = SSLContext.getInstance("TLS");
            X509TrustManager tm = new X509TrustManager() {
                public X509Certificate[] getAcceptedIssuers() {
                    return null;
                }
                public void checkClientTrusted(X509Certificate[] xcs, String str) {
                	
                }
                public void checkServerTrusted(X509Certificate[] xcs, String str) {
                	
                }
            };
            ctx.init(null, new TrustManager[] { tm }, null);
            SSLSocketFactory ssf = new SSLSocketFactory(ctx);
            ssf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
            ClientConnectionManager ccm = httpClient.getConnectionManager();
            SchemeRegistry registry = ccm.getSchemeRegistry();
            registry.register(new Scheme("https", 443, ssf));
        } catch (KeyManagementException ex) {
            throw new RuntimeException(ex);
        } catch (NoSuchAlgorithmException ex) {
        	throw new RuntimeException(ex);
        }
    }
}


/**
 * OCR识别 返回字符串显示
 * 
 * @author Albert
 * @date 2020-03-16
 * @version 1.0
 *
 */
public class OcrAliyunBO implements OcrAliyunIBPO {

	private String OcrPath = PropertiesUtils.readValue("aliyun.ocr.albert.OcrPath");
	// "C:\\temp\\";

	/**
	 * OCR识别 返回字符串显示
	 */
	public List<String[]> AliyunOcr(File file) {

		List<String[]> list = new ArrayList<String[]>();

		// 2、印刷文字识别-表格识别/OCR文字识别
		String host = PropertiesUtils.readValue("aliyun.ocr.albert.host");
		String path = PropertiesUtils.readValue("aliyun.ocr.albert.path");
		// 输入appcode
		String appcode = PropertiesUtils.readValue("aliyun.ocr.albert.appcode");
		String format=  PropertiesUtils.readValue("aliyun.ocr.albert.format");
		String imgFile = file.getAbsolutePath();
		Boolean is_old_format = false;// 如果文档的输入中含有inputs字段,设置为True, 否则设置为False
		// 请根据线上文档修改configure字段
		JSONObject configObj = new JSONObject();
		/*
		 * 参数说明: 1. format 输出格式:html/json/xlsx; 2. finance 是否使用财务报表模型: true/false; 3.
		 * dir_assure 图片方向是否确定是正向的: true(确定)/false(不确定) 4. line_less:是否无线条:
		 * true(无线条,或者只有横线没有竖线)/false(有线条) 5. skip_detection:
		 * 是否跳过检测,如果没有检测到表格,可以设置"skip_detection”:true
		 */
		configObj.put("format", format);
		configObj.put("finance", false);
		configObj.put("dir_assure", false);
		String config_str = configObj.toString();

		String method = "POST";
		Map<String, String> headers = new HashMap<String, String>();
		// 最后在header中的格式(中间是英文空格)为Authorization:APPCODE 83359fd73fe94948385f570e3c139105
		headers.put("Authorization", "APPCODE " + appcode);
		headers.put("Content-Type", "application/json; charset=UTF-8");
		Map<String, String> querys = new HashMap<String, String>();
		// 拼装请求body的json字符串
		JSONObject requestObj = new JSONObject();
		String bodys = null;
		// 对图像进行base64编码
		String imgBase64 = "";

		try {
			imgBase64 = AliyunUtils.changeToBase64(imgFile);
		} catch (Exception e2) {
			// TODO Auto-generated catch block
			e2.printStackTrace();
		}

		try {
			if (is_old_format) {
				JSONObject obj = new JSONObject();
				obj.put("image", getParam(50, imgBase64));
				if (config_str.length() > 0) {
					obj.put("configure", getParam(50, config_str));
				}
				JSONArray inputArray = new JSONArray();
				inputArray.add(obj);
				requestObj.put("inputs", inputArray);
			} else {
				requestObj.put("image", imgBase64);
				if (config_str.length() > 0) {
					requestObj.put("configure", config_str);
				}
			}
		} catch (JSONException e) {
			e.printStackTrace();
		}

		bodys = requestObj.toString();

		try {
			HttpResponse response = HttpUtils.doPost(host, path, method, headers, querys, bodys);
			int stat = response.getStatusLine().getStatusCode();
			if (stat != 200) {
				System.out.println("Http code: " + stat);
				System.out.println("http header error msg: " + response.getFirstHeader("X-Ca-Error-Message"));
				System.out.println("Http body error msg:" + EntityUtils.toString(response.getEntity()));
				if (stat == 413) {
					String message[] = { "erro", "请求资源超过限制:大小不超过5M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式" };
					list.add(message);
				} else {
					String message[] = { "erro", "请求参数输入不正确!" };
					list.add(message);
				}
				return list;
			}

			String res = EntityUtils.toString(response.getEntity());

			JSONObject res_obj = JSON.parseObject(res);
			Long fileName = System.currentTimeMillis();
			if (is_old_format) {
				JSONArray outputArray = res_obj.getJSONArray("outputs");
				String output = outputArray.getJSONObject(0).getJSONObject("outputValue").getString("dataValue");
				JSONObject out = JSON.parseObject(output);
				// System.out.println(out.toJSONString());
			} else {
				// System.out.println(res_obj.toJSONString());
				String tmp_base64path = (OcrPath + fileName).concat(".")+format;
				File tmp_base64file = new File(tmp_base64path);
				if (!tmp_base64file.exists()) {
					tmp_base64file.getParentFile().mkdirs();
				}
				tmp_base64file.createNewFile();
				

					// write
					FileWriter fw = new FileWriter(tmp_base64file, true);
					BufferedWriter bw = new BufferedWriter(fw);
					bw.write(res_obj.getString("tables"));
					bw.flush();
					bw.close();
					fw.close();
				list = readTextData(tmp_base64path, true);
			}
		} catch (Exception e) {
			e.printStackTrace();
		}

		return list;
	}

	public static JSONObject getParam(int type, String dataValue) {
		JSONObject obj = new JSONObject();
		try {
			obj.put("dataType", type);
			obj.put("dataValue", dataValue);
		} catch (JSONException e) {
			e.printStackTrace();
		}
		return obj;
	}

	

}


/**
	 * 读取识别之后的数据(HTML文件)
	 * 
	 * @param txtFilePath
	 * @param isDelete
	 *            解析完成之后是否删除文件
	 * @return
	 */
	private List<String[]> readTextData(String txtFilePath, boolean isDelete) {
		// isDelete = false;// 测试时使用(保留识别结果原件)
		List<String[]> list = new ArrayList<String[]>();
		try {

			String code = "UTF-8";
			File file = new File(txtFilePath);
			BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), code));
			String lineTxt = null;
			while ((lineTxt = br.readLine()) != null) {
				System.out.println(lineTxt);
				String text1 = lineTxt.replace("<tr><td colspan=\"1\" rowspan=\"1\">", "");
				String text2 = text1.replace("<tr><td colspan=\"2\" rowspan=\"1\">", "");
				String text3 = text2.replace("<tr><td colspan=\"2\" rowspan=\"2\">", "");
				String text4 = text3.replace("</td><td colspan=\"2\" rowspan=\"1\">",
						"</td><td colspan=\"1\" rowspan=\"1\">");
				String[] split = text4.split("</td><td colspan=\"1\" rowspan=\"1\">");
				list.add(split);
				// System.out.println(split.length);
			}
			br.close();
			if (isDelete) {
				if (!file.delete()) {
					System.out.println("文件删除失败!" + txtFilePath);
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return list;
	}

}



/**
 * 说明 : 读取配置文件工具类
 * @time 2020年03月16日
 * @author Albert
 * @date 2020-03-16
 * @version 1.0
 * 
 */
public class PropertiesUtils {

	/**
	 * 根据key读取value
	 * 
	 * @param filePath
	 * @param key
	 * @return
	 */
	public static String readValue( String key) {
			Properties prop = getOcrProperties();
			return prop.getProperty(key.trim());
	}

	/**
	 * 获取配置文件
	 * 
	 * @param filePath
	 * @return
	 */
	public static Map<String, String> getPropertiesMap() {
		/*if (null == filePath)
			return null;*/
		Properties properties = getOcrProperties();
				//getProperties(filePath);
		Map<String, String> map = new HashMap<String, String>();
		if (null == properties)
			return map;
		Set<Object> keySet = properties.keySet();
		for (Object key : keySet) {
			if (null == key)
				continue;
			Object object = properties.get(key);
			String value = null == object ? null : object.toString();
			String keyStr = key.toString();
//			System.out.println(keyStr + ":" + value);
			map.put(keyStr, value);
		}

		return map;

	}

	
	/**
	 * 2、获取配置文件 OCR
	 * 
	 * @param filePath
	 * @return
	 */
	public static Properties getOcrProperties() {
		InputStream inputStream;        
		ClassLoader cl = PropertiesUtils.class .getClassLoader();        
		if  (cl !=  null ) {        
		         inputStream = cl.getResourceAsStream( "ocr-config.properties" );        
		}  else {  
		         inputStream = ClassLoader.getSystemResourceAsStream( "ocr-config.properties" );        
		}        
		Properties dbProps =  new  Properties();        
		try {
			dbProps.load(new InputStreamReader(inputStream, "UTF-8"));
			inputStream.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			try {
				if (inputStream != null) {
					inputStream.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}   
	
		
		return dbProps;
	}

}



#配置信息
#ocr-config.properties
#create by Albert 2020-03-16
#OCR interface production property
aliyun.ocr.albert.host=https\://form.market.alicloudapi.com
aliyun.ocr.albert.path=/api/predict/ocr_table_parse
aliyun.ocr.albert.appcode=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
aliyun.ocr.albert.OcrPath=C\:\\temp\\
aliyun.ocr.albert.format=html

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值