所用插件为pdfbox
1.用apache的pdf转换插件:https://pdfbox.apache.org/downloads.html
2.在线开发文档:https://pdfbox.apache.org/docs/2.0.1/javadocs/
3.代码如下
public List<EcdInvoiceOcrDto> pdfFileToPic(String picName,File invoiceFile
,String invoiceTypeCode) throws Exception{
List<EcdInvoiceOcrDto> picPathList = new ArrayList<EcdInvoiceOcrDto>();
PDDocument doc = PDDocument.load(invoiceFile);
PDFRenderer reader = new PDFRenderer(doc);
int pageNo = doc.getNumberOfPages();
for (int i = 0; i < pageNo; i++) {
//1.将pdf转为图片
BufferedImage bfi = reader.renderImageWithDPI(i, 96);
//2.获取图片存储路劲
File file2 = ocrRecognitionService.getPicPath(picName);
ImageIO.write(bfi, picName.split(" \\.")[1], file2);
/*
* 3.新建发票实体类并设置发票的存放路劲
* 若发票为专票或是普票则将发票图片变成二进制数组 传回 待发票识别时使用
*/
EcdInvoiceOcrDto ecdInvoiceOcrDto = new EcdInvoiceOcrDto();
ByteArrayOutputStream bos = null;
FileInputStream fis=null;
try {
if(invoiceTypeCode.equals(IConstants.INVOICE_TYPE_SPECIAL)
||invoiceTypeCode.equals(IConstants.INVOICE_TYPE_PLAIN)){
bos = new ByteArrayOutputStream();
fis = new FileInputStream(file2);
byte[] data=new byte[2048];
int len=0;
while((len=fis.read(data))!=-1){
bos.write(data, 0, len);
}
bos.flush();
ecdInvoiceOcrDto.setImageStreamStr(new BASE64Encoder().encode(bos.toByteArray()));
}
} catch (Exception e) {
logger.error("读取图片成二进制数组失败",e);
throw new Exception();
}finally{
if(bos!=null){
bos.close();
}
if(fis!=null){
fis.close();
}
}
ecdInvoiceOcrDto.setPicPath(file2.getAbsolutePath());
ecdInvoiceOcrDto.setImageName(file2.getName());
picPathList.add(ecdInvoiceOcrDto);
}
return picPathList;
}
,String invoiceTypeCode) throws Exception{
List<EcdInvoiceOcrDto> picPathList = new ArrayList<EcdInvoiceOcrDto>();
PDDocument doc = PDDocument.load(invoiceFile);
PDFRenderer reader = new PDFRenderer(doc);
int pageNo = doc.getNumberOfPages();
for (int i = 0; i < pageNo; i++) {
//1.将pdf转为图片
BufferedImage bfi = reader.renderImageWithDPI(i, 96);
//2.获取图片存储路劲
File file2 = ocrRecognitionService.getPicPath(picName);
ImageIO.write(bfi, picName.split(" \\.")[1], file2);
/*
* 3.新建发票实体类并设置发票的存放路劲
* 若发票为专票或是普票则将发票图片变成二进制数组 传回 待发票识别时使用
*/
EcdInvoiceOcrDto ecdInvoiceOcrDto = new EcdInvoiceOcrDto();
ByteArrayOutputStream bos = null;
FileInputStream fis=null;
try {
if(invoiceTypeCode.equals(IConstants.INVOICE_TYPE_SPECIAL)
||invoiceTypeCode.equals(IConstants.INVOICE_TYPE_PLAIN)){
bos = new ByteArrayOutputStream();
fis = new FileInputStream(file2);
byte[] data=new byte[2048];
int len=0;
while((len=fis.read(data))!=-1){
bos.write(data, 0, len);
}
bos.flush();
ecdInvoiceOcrDto.setImageStreamStr(new BASE64Encoder().encode(bos.toByteArray()));
}
} catch (Exception e) {
logger.error("读取图片成二进制数组失败",e);
throw new Exception();
}finally{
if(bos!=null){
bos.close();
}
if(fis!=null){
fis.close();
}
}
ecdInvoiceOcrDto.setPicPath(file2.getAbsolutePath());
ecdInvoiceOcrDto.setImageName(file2.getName());
picPathList.add(ecdInvoiceOcrDto);
}
return picPathList;
}