java 解析docx_java解析xlsx和docx 文件 | 学步园

用PIO包解析office2007 的excel 和word文档,需要的jar包 有:

pio.jar: http://poi.apache.org/download.html

domj4.jar:http://www.java2s.com/Code/Jar/d/Downloaddom4jjar.htm

xmlbeans-2.3.0.jar:http://www.jarfinder.com/index.php/jars/versionInfo/14388

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import org.apache.poi.POITextExtractor;

import org.apache.poi.extractor.ExtractorFactory;

import org.apache.poi.hssf.usermodel.HSSFCell;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;

import org.apache.poi.xssf.usermodel.XSSFCell;

import org.apache.poi.xssf.usermodel.XSSFRow;

import org.apache.poi.xssf.usermodel.XSSFSheet;

import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import org.apache.xmlbeans.XmlException;

public class ExcelParser {

private XSSFWorkbook workbook;

public ExcelParser(File file) {

try {

workbook = new XSSFWorkbook(new FileInputStream(file));

} catch (IOException e) {

e.printStackTrace();

}

}

private XSSFSheet getSheet(XSSFWorkbook workBook, int index) {

return workBook.getSheetAt(index);

}

private XSSFSheet getSheet(XSSFWorkbook workBook, String name) {

return workBook.getSheet(name);

}

public List> getDatasInSheet(int sheetIndex) {

List> results = new ArrayList>();

XSSFSheet sheet = workbook.getSheetAt(sheetIndex);

int rowCount = sheet.getLastRowNum();

if (rowCount < 1) {

return results;

} else {

for (int i = 0; i < rowCount; i++) {

XSSFRow row = sheet.getRow(i);

if (row != null) {

List rowData = new ArrayList();

int cellCount = row.getLastCellNum();

for (int cellIndex = 0; cellIndex < cellCount; cellIndex++) {

XSSFCell cell = row.getCell(cellIndex);

Object cellStr = this.getCellData(cell);

String ce = cellStr == null ? "" : cellStr.toString();

rowData.add(ce);

}

results.add(rowData);

}

}

return results;

}

}

private Object getCellData(XSSFCell cell) {

Object result = null;

if (cell != null) {

int cellType = cell.getCellType();

switch (cellType) {

case HSSFCell.CELL_TYPE_STRING:

result = cell.getRichStringCellValue().getString();

break;

case HSSFCell.CELL_TYPE_NUMERIC:

result = cell.getNumericCellValue();

break;

case HSSFCell.CELL_TYPE_FORMULA:

result = cell.getNumericCellValue();

break;

case HSSFCell.CELL_TYPE_BOOLEAN:

result = cell.getBooleanCellValue();

break;

case HSSFCell.CELL_TYPE_BLANK:

result = null;

break;

case HSSFCell.CELL_TYPE_ERROR:

result = null;

break;

default:

result = null;

System.out.println("NO type matched! ");

break;

}

}

return result;

}

public static void main(String[] args) {

System.out.println("hi");

ExcelParser excelParser = new ExcelParser(new File(

"F:\\Test\\ExcelParser\\分析.xlsx"));

List> results = excelParser.getDatasInSheet(0);

int dataSize = results.size();

for (int i = 0; i < dataSize; i++) {

List row = results.get(i);

for (int j = 0; j < row.size(); j++) {

String cell = row.get(j);

System.out.print(cell + "\t");

}

System.out.println();

}

TextExtractor textExtractor = new TextExtractor("F:\\Test\\ExcelParser\\论文阅读摘要.docx");

System.out.println(textExtractor.getContent());

}

}

class TextExtractor {

private File file;

public TextExtractor(String path) {

file = new File(path);

}

public String getContent() {

try {

POITextExtractor textExtractor = ExtractorFactory.createExtractor(file);

return textExtractor.getText();

} catch (InvalidFormatException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (OpenXML4JException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (XmlException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

return null;

}

}

引用:http://my.csdn.net/guyezi123/code/detail/30042

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值