public static String readWord(InputStream inputStream,String suffix) throws Exception{
// File file = new File(filePath);
// if(file.length()==0) return ""; // 需要操作原因是可能会空文件问题,如果不做处理,在下面读取中会报错
StringBuffer sb = new StringBuffer();
String buffer = "";
try {
if ("doc".equals(suffix)) {
// InputStream is = new FileInputStream(file);
WordExtractor ex = new WordExtractor(inputStream);
buffer = ex.getText();
if(buffer.length() > 0){
//使用回车换行符分割字符串
String [] arry = buffer.split("\r\n");
for (String string : arry) {
sb.append(string.trim());
}
}
} else if ("docx".equals(suffix)) {
OPCPackage opcPackage = OPCPackage.open(inputStream);
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
buffer = extractor.getText();
if(buffer.length() > 0){
//使用换行符分割字符串
String [] arry = buffer.split("\n");
for (String string : arry) {
sb.append(string.trim());
}
}
} else if("pdf".equals(suffix)){
RandomAccessBuffer rab = new RandomAccessBuffer(inputStream);
PDFParser pdfParser = new PDFParser(rab);
pdfParser.parse();
PDDocument document = pdfParser.getPDDocument();
// 获取页码
int pages = document.getNumberOfPages();
PDFTextStripper stripper = new PDFTextStripper();
// 设置按顺序输出
stripper.setSortByPosition(true);
stripper.setStartPage(1);
stripper.setEndPage(pages);
sb.append(stripper.getText(document));
}else{
return null;
}
return sb.toString();
} catch (Exception e) {
throw e;
}
}
Java读取word文档
最新推荐文章于 2024-07-20 02:56:20 发布