package com.example.demo.BMPLoader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
/**
*
* 读取pdf以及xls文件并对比
*/
public class test {
public static void main(String[] args) {
String fileName = "D:/1.pdf";
File file = new File(fileName);
FileInputStream in = null;
try {
String string;
FileInputStream inputStream = new FileInputStream("D:/实验文件.xls");
HSSFWorkbook workbook = new HSSFWorkbook(inputStream);
ExcelExtractor excelExtractor = new ExcelExtractor(workbook);
string = excelExtractor.getText();
System.out.println(string);
in = new FileInputStream(fileName);
// 新建一个PDF解析器对象
PDFParser parser = new PDFParser(in);
// 对PDF文件进行解析
parser.parse();
// 获取解析后得到的PDF文档对象
PDDocument pdfdocument = parser.getPDDocument();
// 新建一个PDF文本剥离器
PDFTextStripper stripper = new PDFTextStripper();
// 从PDF文档对象中剥离文本
String result = stripper.getText(pdfdocument);
//System.out.println("PDF文件的文本内容如下:");
System.out.println(result);
List<WordBean> list = ResultUtils.getError(string, result);
for(WordBean wordBean : list){
if (wordBean.getTarIndexs() == null) {
System.err.println(wordBean.getWord()+"--------------原文位置"+wordBean.getOraIndex());
}
}
} catch (Exception e) {
//String str = file.getAbsolutePath();
//System.out.println(str);
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e1) {
}
}
}
}
}