java word2007_Java解析word2007和Excel2007

package com.test;

/**

* 需要的jar包:

* poi-3.0.2-FINAL-20080204.jar

* poi-contrib-3.0.2-FINAL-20080204.jar

* poi-scratchpad-3.0.2-FINAL-20080204.jar

* poi-3.5-beta6-20090622.jar

* geronimo-stax-api_1.0_spec-1.0.jar

* ooxml-schemas-1.0.jar

* openxml4j-bin-beta.jar

* poi-ooxml-3.5-beta6-20090622.jar

* xmlbeans-2.3.0.jar

* dom4j-1.6.1.jar

*/

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import org.apache.poi.POIXMLDocument;

import org.apache.poi.POIXMLTextExtractor;

import org.apache.poi.hssf.usermodel.HSSFCell;

import org.apache.poi.hssf.usermodel.HSSFRow;

import org.apache.poi.hssf.usermodel.HSSFSheet;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;

import org.apache.poi.hwpf.extractor.WordExtractor;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;

import org.apache.poi.openxml4j.opc.OPCPackage;

import org.apache.poi.xssf.usermodel.XSSFCell;

import org.apache.poi.xssf.usermodel.XSSFRow;

import org.apache.poi.xssf.usermodel.XSSFSheet;

import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import org.apache.poi.xwpf.extractor.XWPFWordExtractor;

import org.apache.xmlbeans.XmlException;

public class WordAndExcelExtractor {

public static void main(String[] args){

try{

String wordFile = "D:/松山血战.docx";

String wordText2007 = WordAndExcelExtractor.extractTextFromDOC2007(wordFile);

System.out.println("wordText2007======="+wordText2007);

InputStream is = new FileInputStream("D:/XXX研发中心技术岗位职位需求.xls");

String excelText = WordAndExcelExtractor.extractTextFromXLS(is);

System.out.println("text2003==========" + excelText);

String excelFile = "D:/Hello2007.xlsx";

String excelText2007 = WordAndExcelExtractor.extractTextFromXLS2007(excelFile);

System.out.println("excelText2007==========" + excelText2007);

}catch(Exception e ){

e.printStackTrace();

}

}

/**

* @Method: extractTextFromDOCX

* @Description: 从word 2003文档中提取纯文本

*

* @param

* @return String

* @throws

*/

public static String extractTextFromDOC(InputStream is) throws IOException {

WordExtractor ex = new WordExtractor(is); //is是WORD文件的InputStream

return ex.getText();

}

/**

* @Method: extractTextFromDOCX

* @Description: 从word 2007文档中提取纯文本

*

* @param

* @return String

* @throws

*/

public static String extractTextFromDOC2007(String fileName) throws IOException, OpenXML4JException, XmlException {

OPCPackage opcPackage = POIXMLDocument.openPackage(fileName);

POIXMLTextExtractor ex = new XWPFWordExtractor(opcPackage);

return ex.getText();

}

/**

* @Method: extractTextFromXLS

* @Description: 从excel 2003文档中提取纯文本

*

* @param

* @return String

* @throws

*/

@SuppressWarnings("deprecation")

private static String extractTextFromXLS(InputStream is)

throws IOException {

StringBuffer content = new StringBuffer();

HSSFWorkbook workbook = new HSSFWorkbook(is); //创建对Excel工作簿文件的引用

for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) {

if (null != workbook.getSheetAt(numSheets)) {

HSSFSheet aSheet = workbook.getSheetAt(numSheets); //获得一个sheet

for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) {

if (null != aSheet.getRow(rowNumOfSheet)) {

HSSFRow aRow = aSheet.getRow(rowNumOfSheet); //获得一行

for (short cellNumOfRow = 0; cellNumOfRow <= aRow.getLastCellNum(); cellNumOfRow++) {

if (null != aRow.getCell(cellNumOfRow)) {

HSSFCell aCell = aRow.getCell(cellNumOfRow); //获得列值

if(aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC){

content.append(aCell.getNumericCellValue());

}else if(aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN){

content.append(aCell.getBooleanCellValue());

}else {

content.append(aCell.getStringCellValue());

}

}

}

}

}

}

}

return content.toString();

}

/**

* @Method: extractTextFromXLS2007

* @Description: 从excel 2007文档中提取纯文本

*

* @param

* @return String

* @throws

*/

private static String extractTextFromXLS2007(String fileName) throws Exception{

StringBuffer content = new StringBuffer();

//构造 XSSFWorkbook 对象,strPath 传入文件路径

XSSFWorkbook xwb = new XSSFWorkbook(fileName);

//循环工作表Sheet

for(int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++){

XSSFSheet xSheet = xwb.getSheetAt(numSheet);

if(xSheet == null){

continue;

}

//循环行Row

for(int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++){

XSSFRow xRow = xSheet.getRow(rowNum);

if(xRow == null){

continue;

}

//循环列Cell

for(int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++){

XSSFCell xCell = xRow.getCell(cellNum);

if(xCell == null){

continue;

}

if(xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN){

content.append(xCell.getBooleanCellValue());

}else if(xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC){

content.append(xCell.getNumericCellValue());

}else{

content.append(xCell.getStringCellValue());

}

}

}

}

return content.toString();

}

} 转载自:

http://archive.cnblogs.com/a/1759383/

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值