packageorg.gaoyoubo.resolve;importjava.io.BufferedReader;importjava.io.ByteArrayOutputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileReader;importjava.io.LineNumberReader;importjava.io.OutputStreamWriter;importjava.util.ArrayList;importjava.util.List;importorg.apache.poi.hssf.usermodel.HSSFCell;importorg.apache.poi.hssf.usermodel.HSSFRow;importorg.apache.poi.hssf.usermodel.HSSFSheet;importorg.apache.poi.hssf.usermodel.HSSFWorkbook;publicclassResolve {/*** 得到文件后缀名*/publicString getExt(String path) {
String ext="";if(path!=null&&!"".equals(path)) {
File file=newFile(path);if(file.exists()) {
ext=path.substring(path.lastIndexOf(".")+1);
}
}returnext;
}publicString execute(String path) {
String content="";
String ext=getExt(path);if(ext!=null&&!"".equals(ext)) {if(Const.FILE_TYPE_LIST.contains(ext)) {if(ext.equals("txt")) {
content=resolveText(path);
}elseif(ext.equals("doc")) {
content=resolveWord2003(path);
}elseif(ext.equals("docx")) {
content=resolveWord2007(path);
}elseif(ext.equals("pdf")) {
content=resolvePdf(path);
}elseif(ext.equals("xls")) {
content=resolveExcel2003(path);
}elseif(ext.equals("xlsx")) {
content=resolveExcel2007(path);
}}
}else{
System.err.println("无法解析文件:"+path+"!");
}returncontent;
}/*** 解析word2007
*@parampath 文件路径
*@return文件内容*/publicString resolveWord2007(String path){
String content="";try{
OPCPackage opcPackage=POIXMLDocument.openPackage(path);
POIXMLTextExtractor ex=newXWPFWordExtractor(opcPackage);
content=ex.getText();
}catch(Exception e) {
System.err.println("解析文件:"+path+"失败!");
}returncontent;
}/*** 解析word2003
*@parampath 文件路径
*@return文件内容*/publicString resolveWord2003(String path){
String content="";try{
File file=newFile(path);
FileInputStream fis=newFileInputStream(file);
WordExtractor ex=newWordExtractor(fis);
content=ex.getText();
fis.close();
}catch(Exception e) {
System.err.println("解析文件:"+path+"失败!");
}returncontent;
}/*** 解析Excel2003
*@parampath
*@return*/publicString resolveExcel2003(String path){
StringBuffer content=newStringBuffer("");try{
File file=newFile(path);
FileInputStream fis=newFileInputStream(file);
HSSFWorkbook wordbook=newHSSFWorkbook(fis);//遍历sheetfor(inti=0; i
HSSFSheet sheet=wordbook.getSheetAt(i);//得到sheet//遍历该sheet中的数据for(intj=0; j
HSSFRow row=sheet.getRow(j);//获取一行//循环遍历cellfor(intk=0; k
HSSFCell cell=row.getCell(k);//获取单元格的值if(cell.getCellType()==HSSFCell.CELL_TYPE_NUMERIC) {
content.append(cell.getNumericCellValue());
}elseif(cell.getCellType()==HSSFCell.CELL_TYPE_BOOLEAN) {
content.append(cell.getBooleanCellValue());
}else{
content.append(cell.getStringCellValue());
}
}
}
}
}
}
fis.close();
}catch(Exception e) {
System.err.println("解析文件:"+path+"失败!");
}returncontent.toString();
}/*** 解析Excel2007
*@parampath
*@return*/publicString resolveExcel2007(String path){
StringBuffer content=newStringBuffer("");try{
XSSFWorkbook wb=newXSSFWorkbook(path);//遍历sheetfor(inti=0; i
XSSFSheet sheet=wb.getSheetAt(i);if(sheet==null) {continue;
}//遍历行for(intj=0; j
XSSFRow row=sheet.getRow(j);if(row==null) {continue;
}//遍历单元格for(intk=0; k
XSSFCell cell=row.getCell(k);if(cell==null) {continue;
}if(cell.getCellType()==XSSFCell.CELL_TYPE_BOOLEAN) {
content.append(cell.getBooleanCellValue());
}elseif(cell.getCellType()==XSSFCell.CELL_TYPE_NUMERIC) {
content.append(cell.getNumericCellValue());
}else{
content.append(cell.getStringCellValue());
}
}
}
}
}catch(Exception e) {
System.err.println("解析文件:"+path+"失败!");
}returncontent.toString();
}/*** 解析pdf
*@parampath 文件路径
*@return文件内容*/publicString resolvePdf(String path) {
String content="";//StringBuffer content = new StringBuffer("");try{/*FileInputStream fis = new FileInputStream(path);
PDFParser p = new PDFParser(fis);
p.parse();
PDFTextStripper ts = new PDFTextStripper();
content.append(ts.getText(p.getPDDocument()));
fis.close();*/File file=newFile(path);
PDDocument doc=PDDocument.load(file);
ByteArrayOutputStream out=newByteArrayOutputStream();
OutputStreamWriter writer=newOutputStreamWriter(out);
PDFTextStripper ts=newPDFTextStripper();
ts.writeText(doc, writer);
doc.close();
out.close();
writer.close();byte[] contents=out.toByteArray();
content=newString(contents);
}catch(Exception e) {
System.err.println("解析文件:"+path+"失败!");
}returncontent.toString();
}/*** 解析普通文本文件
*@parampath
*@return*/publicString resolveText(String path){
StringBuffer content=newStringBuffer("");try{
File file=newFile(path);
FileReader reader=newFileReader(file);
BufferedReader br=newBufferedReader(reader);while(br.read()!=-1) {
content.append(br.readLine());
}
br.close();
reader.close();
}catch(Exception e) {
System.err.println("读取文件:"+path+"失败!");
}returncontent.toString();
}}