java读取word2010_Java解析word,excel,pdf

packageorg.gaoyoubo.resolve;importjava.io.BufferedReader;importjava.io.ByteArrayOutputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileReader;importjava.io.LineNumberReader;importjava.io.OutputStreamWriter;importjava.util.ArrayList;importjava.util.List;importorg.apache.poi.hssf.usermodel.HSSFCell;importorg.apache.poi.hssf.usermodel.HSSFRow;importorg.apache.poi.hssf.usermodel.HSSFSheet;importorg.apache.poi.hssf.usermodel.HSSFWorkbook;publicclassResolve {/*** 得到文件后缀名*/publicString getExt(String path) {

String ext="";if(path!=null&&!"".equals(path)) {

File file=newFile(path);if(file.exists()) {

ext=path.substring(path.lastIndexOf(".")+1);

}

}returnext;

}publicString execute(String path) {

String content="";

String ext=getExt(path);if(ext!=null&&!"".equals(ext)) {if(Const.FILE_TYPE_LIST.contains(ext)) {if(ext.equals("txt")) {

content=resolveText(path);

}elseif(ext.equals("doc")) {

content=resolveWord2003(path);

}elseif(ext.equals("docx")) {

content=resolveWord2007(path);

}elseif(ext.equals("pdf")) {

content=resolvePdf(path);

}elseif(ext.equals("xls")) {

content=resolveExcel2003(path);

}elseif(ext.equals("xlsx")) {

content=resolveExcel2007(path);

}}

}else{

System.err.println("无法解析文件:"+path+"!");

}returncontent;

}/*** 解析word2007

*@parampath 文件路径

*@return文件内容*/publicString resolveWord2007(String path){

String content="";try{

OPCPackage opcPackage=POIXMLDocument.openPackage(path);

POIXMLTextExtractor ex=newXWPFWordExtractor(opcPackage);

content=ex.getText();

}catch(Exception e) {

System.err.println("解析文件:"+path+"失败!");

}returncontent;

}/*** 解析word2003

*@parampath 文件路径

*@return文件内容*/publicString resolveWord2003(String path){

String content="";try{

File file=newFile(path);

FileInputStream fis=newFileInputStream(file);

WordExtractor ex=newWordExtractor(fis);

content=ex.getText();

fis.close();

}catch(Exception e) {

System.err.println("解析文件:"+path+"失败!");

}returncontent;

}/*** 解析Excel2003

*@parampath

*@return*/publicString resolveExcel2003(String path){

StringBuffer content=newStringBuffer("");try{

File file=newFile(path);

FileInputStream fis=newFileInputStream(file);

HSSFWorkbook wordbook=newHSSFWorkbook(fis);//遍历sheetfor(inti=0; i

HSSFSheet sheet=wordbook.getSheetAt(i);//得到sheet//遍历该sheet中的数据for(intj=0; j

HSSFRow row=sheet.getRow(j);//获取一行//循环遍历cellfor(intk=0; k

HSSFCell cell=row.getCell(k);//获取单元格的值if(cell.getCellType()==HSSFCell.CELL_TYPE_NUMERIC) {

content.append(cell.getNumericCellValue());

}elseif(cell.getCellType()==HSSFCell.CELL_TYPE_BOOLEAN) {

content.append(cell.getBooleanCellValue());

}else{

content.append(cell.getStringCellValue());

}

}

}

}

}

}

fis.close();

}catch(Exception e) {

System.err.println("解析文件:"+path+"失败!");

}returncontent.toString();

}/*** 解析Excel2007

*@parampath

*@return*/publicString resolveExcel2007(String path){

StringBuffer content=newStringBuffer("");try{

XSSFWorkbook wb=newXSSFWorkbook(path);//遍历sheetfor(inti=0; i

XSSFSheet sheet=wb.getSheetAt(i);if(sheet==null) {continue;

}//遍历行for(intj=0; j

XSSFRow row=sheet.getRow(j);if(row==null) {continue;

}//遍历单元格for(intk=0; k

XSSFCell cell=row.getCell(k);if(cell==null) {continue;

}if(cell.getCellType()==XSSFCell.CELL_TYPE_BOOLEAN) {

content.append(cell.getBooleanCellValue());

}elseif(cell.getCellType()==XSSFCell.CELL_TYPE_NUMERIC) {

content.append(cell.getNumericCellValue());

}else{

content.append(cell.getStringCellValue());

}

}

}

}

}catch(Exception e) {

System.err.println("解析文件:"+path+"失败!");

}returncontent.toString();

}/*** 解析pdf

*@parampath 文件路径

*@return文件内容*/publicString resolvePdf(String path) {

String content="";//StringBuffer content = new StringBuffer("");try{/*FileInputStream fis = new FileInputStream(path);

PDFParser p = new PDFParser(fis);

p.parse();

PDFTextStripper ts = new PDFTextStripper();

content.append(ts.getText(p.getPDDocument()));

fis.close();*/File file=newFile(path);

PDDocument doc=PDDocument.load(file);

ByteArrayOutputStream out=newByteArrayOutputStream();

OutputStreamWriter writer=newOutputStreamWriter(out);

PDFTextStripper ts=newPDFTextStripper();

ts.writeText(doc, writer);

doc.close();

out.close();

writer.close();byte[] contents=out.toByteArray();

content=newString(contents);

}catch(Exception e) {

System.err.println("解析文件:"+path+"失败!");

}returncontent.toString();

}/*** 解析普通文本文件

*@parampath

*@return*/publicString resolveText(String path){

StringBuffer content=newStringBuffer("");try{

File file=newFile(path);

FileReader reader=newFileReader(file);

BufferedReader br=newBufferedReader(reader);while(br.read()!=-1) {

content.append(br.readLine());

}

br.close();

reader.close();

}catch(Exception e) {

System.err.println("读取文件:"+path+"失败!");

}returncontent.toString();

}}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值