java 解析office_java 解析office文件 大全

packageorg.css.resource.businesssoft.searchengine.quwenjiansuo;importjava.io.File;importjava.io.FileInputStream;importjava.io.IOException;importjava.io.InputStream;importorg.apache.poi.POITextExtractor;importorg.apache.poi.POIXMLDocument;importorg.apache.poi.POIXMLTextExtractor;importorg.apache.poi.extractor.ExtractorFactory;importorg.apache.poi.hssf.usermodel.HSSFCell;importorg.apache.poi.hssf.usermodel.HSSFRow;importorg.apache.poi.hssf.usermodel.HSSFSheet;importorg.apache.poi.hssf.usermodel.HSSFWorkbook;importorg.apache.poi.hwpf.extractor.WordExtractor;importorg.apache.poi.openxml4j.exceptions.OpenXML4JException;importorg.apache.poi.openxml4j.opc.OPCPackage;importorg.apache.poi.xssf.usermodel.XSSFCell;importorg.apache.poi.xssf.usermodel.XSSFRow;importorg.apache.poi.xssf.usermodel.XSSFSheet;importorg.apache.poi.xssf.usermodel.XSSFWorkbook;importorg.apache.poi.xwpf.extractor.XWPFWordExtractor;importorg.apache.xmlbeans.XmlException;/***

*@authorlizh

**/

public classCovertFile {/*** 从word 2003文档中提取纯文本

*@paramis

*@return*@throwsIOException*/

public static String extractTextFromDOC(InputStream is) throwsIOException {

WordExtractor ex= new WordExtractor(is); //is是WORD文件的InputStream

returnex.getText();

}/*** 从word 2007文档中提取纯文本

*@paramfileName

*@return

*/

public staticString extractTextFromDOC2007(String fileName) {try{

OPCPackage opcPackage=POIXMLDocument.openPackage(fileName);

POIXMLTextExtractor ex= newXWPFWordExtractor(opcPackage);returnex.getText();

}catch(Exception e) {return "";

}

}/*** 从excel 2003文档中提取纯文本

*@paramis

*@return*@throwsIOException*/

private static String extractTextFromXLS(InputStream is) throwsIOException {

StringBuffer content= newStringBuffer();

HSSFWorkbook workbook= new HSSFWorkbook(is); //创建对Excel工作簿文件的引用

for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) {if (null !=workbook.getSheetAt(numSheets)) {

HSSFSheet aSheet= workbook.getSheetAt(numSheets); //获得一个sheet

for (int rowNumOfSheet = 0; rowNumOfSheet <=aSheet

.getLastRowNum(); rowNumOfSheet++) {if (null !=aSheet.getRow(rowNumOfSheet)) {

HSSFRow aRow= aSheet.getRow(rowNumOfSheet); //获得一行

for (short cellNumOfRow = 0; cellNumOfRow <=aRow

.getLastCellNum(); cellNumOfRow++) {if (null !=aRow.getCell(cellNumOfRow)) {

HSSFCell aCell= aRow.getCell(cellNumOfRow); //获得列值

if (aCell.getCellType() ==HSSFCell.CELL_TYPE_NUMERIC) {

content.append(aCell.getNumericCellValue());

}else if (aCell.getCellType() ==HSSFCell.CELL_TYPE_BOOLEAN) {

content.append(aCell.getBooleanCellValue());

}else{

content.append(aCell.getStringCellValue());

}

}

}

}

}

}

}returncontent.toString();

}/*** 从excel 2007文档中提取纯文本

*@paramfileName

*@return*@throwsException*/

private staticString extractTextFromXLS2007(String fileName)throwsException {

StringBuffer content= newStringBuffer();//构造 XSSFWorkbook 对象,strPath 传入文件路径

XSSFWorkbook xwb = newXSSFWorkbook(fileName);//循环工作表Sheet

for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) {

XSSFSheet xSheet=xwb.getSheetAt(numSheet);if (xSheet == null) {continue;

}//循环行Row

for (int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++) {

XSSFRow xRow=xSheet.getRow(rowNum);if (xRow == null) {continue;

}//循环列Cell

for (int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++) {

XSSFCell xCell=xRow.getCell(cellNum);if (xCell == null) {continue;

}if (xCell.getCellType() ==XSSFCell.CELL_TYPE_BOOLEAN) {

content.append(xCell.getBooleanCellValue());

}else if (xCell.getCellType() ==XSSFCell.CELL_TYPE_NUMERIC) {

content.append(xCell.getNumericCellValue());

}else{

content.append(xCell.getStringCellValue());

}

}

}

}returncontent.toString();

}/*** 从excel 2007文档中提取纯文本

*@paramfileName

*@return

*/

public staticString getXLS2007(String fileName){

String doc= "";try{

doc=extractTextFromXLS2007(fileName);returndoc;

}catch(Exception e){return "";

}

}/*** 从ppt 2003、2007文档中提取纯文本

*@paramfileName

*@return

*/

public staticString getPPTX(String fileName){

String doc= "";try{

File inputFile= newFile(fileName);

POITextExtractor extractor=ExtractorFactory.createExtractor(inputFile);

doc=extractor.getText();returndoc;

}catch(Exception e){return "";

}

}public static voidmain(String[] args) {try{//String wordFile = "D:/松山血战.docx";//String wordText2007 = CovertFile.extractTextFromDOC2007(wordFile);//System.out.println("wordText2007=======" + wordText2007);//

//InputStream is = new FileInputStream("D:/XXX研发中心技术岗位职位需求.xls");//String excelText = CovertFile.extractTextFromXLS(is);//System.out.println("text2003==========" + excelText);//String excelFile = "D:/zh.xlsx";//String excelText2007 = CovertFile.extractTextFromXLS2007(excelFile);//System.out.println("excelText2007==========" + excelText2007);

String pptFile= "D:/zz3.ppt";

String pptx=CovertFile.getPPTX(pptFile);

System.out.println("pptx==========" +pptx);

}catch(Exception e) {

e.printStackTrace();

}

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值