java 解析docx_java解析xlsx和docx 文件 | 学步园

最新推荐文章于 2021-02-13 07:52:20 发布

weixin_39940344

最新推荐文章于 2021-02-13 07:52:20 发布

阅读量193

点赞数

文章标签： java 解析docx

本文链接：https://blog.csdn.net/weixin_39940344/article/details/114116012

版权

用PIO包解析office2007 的excel 和word文档，需要的jar包有：

pio.jar: http://poi.apache.org/download.html

domj4.jar:http://www.java2s.com/Code/Jar/d/Downloaddom4jjar.htm

xmlbeans-2.3.0.jar:http://www.jarfinder.com/index.php/jars/versionInfo/14388

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

import org.apache.poi.POITextExtractor;

import org.apache.poi.extractor.ExtractorFactory;

import org.apache.poi.hssf.usermodel.HSSFCell;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;

import org.apache.poi.xssf.usermodel.XSSFCell;

import org.apache.poi.xssf.usermodel.XSSFRow;

import org.apache.poi.xssf.usermodel.XSSFSheet;

import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import org.apache.xmlbeans.XmlException;

public class ExcelParser {

private XSSFWorkbook workbook;

public ExcelParser(File file) {

try {

workbook = new XSSFWorkbook(new FileInputStream(file));

} catch (IOException e) {

e.printStackTrace();

}

private XSSFSheet getSheet(XSSFWorkbook workBook, int index) {

return workBook.getSheetAt(index);

}

private XSSFSheet getSheet(XSSFWorkbook workBook, String name) {

return workBook.getSheet(name);

}

public List> getDatasInSheet(int sheetIndex) {

List> results = new ArrayList>();

XSSFSheet sheet = workbook.getSheetAt(sheetIndex);

int rowCount = sheet.getLastRowNum();

if (rowCount < 1) {

return results;

} else {

for (int i = 0; i < rowCount; i++) {

XSSFRow row = sheet.getRow(i);

if (row != null) {

List rowData = new ArrayList();

int cellCount = row.getLastCellNum();

for (int cellIndex = 0; cellIndex < cellCount; cellIndex++) {

XSSFCell cell = row.getCell(cellIndex);

Object cellStr = this.getCellData(cell);

String ce = cellStr == null ? "" : cellStr.toString();

rowData.add(ce);

}

results.add(rowData);

}

return results;

}

private Object getCellData(XSSFCell cell) {

Object result = null;

if (cell != null) {

int cellType = cell.getCellType();

switch (cellType) {

case HSSFCell.CELL_TYPE_STRING:

result = cell.getRichStringCellValue().getString();

break;

case HSSFCell.CELL_TYPE_NUMERIC:

result = cell.getNumericCellValue();

break;

case HSSFCell.CELL_TYPE_FORMULA:

result = cell.getNumericCellValue();

break;

case HSSFCell.CELL_TYPE_BOOLEAN:

result = cell.getBooleanCellValue();

break;

case HSSFCell.CELL_TYPE_BLANK:

result = null;

break;

case HSSFCell.CELL_TYPE_ERROR:

result = null;

break;

default:

result = null;

System.out.println("NO type matched! ");

break;

}

return result;

}

public static void main(String[] args) {

System.out.println("hi");

ExcelParser excelParser = new ExcelParser(new File(

"F:\\Test\\ExcelParser\\分析.xlsx"));

List> results = excelParser.getDatasInSheet(0);

int dataSize = results.size();

for (int i = 0; i < dataSize; i++) {

List row = results.get(i);

for (int j = 0; j < row.size(); j++) {

String cell = row.get(j);

System.out.print(cell + "\t");

}

System.out.println();

}

TextExtractor textExtractor = new TextExtractor("F:\\Test\\ExcelParser\\论文阅读摘要.docx");

System.out.println(textExtractor.getContent());

}

class TextExtractor {

private File file;

public TextExtractor(String path) {

file = new File(path);

}

public String getContent() {

try {

POITextExtractor textExtractor = ExtractorFactory.createExtractor(file);

return textExtractor.getText();

} catch (InvalidFormatException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (OpenXML4JException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (XmlException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

return null;

}

引用：http://my.csdn.net/guyezi123/code/detail/30042

weixin_39940344

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
java 解析docx_java解析xlsx和docx 文件 | 学步园

用PIO包解析office2007 的excel 和word文档，需要的jar包有：pio.jar:http://poi.apache.org/download.htmldomj4.jar:http://www.java2s.com/Code/Jar/d/Downloaddom4jjar.htmxmlbeans-2.3.0.jar:http://www.jarfinder.com/index....
复制链接

扫一扫