用PIO包解析office2007 的excel 和word文档,需要的jar包 有:
pio.jar: http://poi.apache.org/download.html
domj4.jar:http://www.java2s.com/Code/Jar/d/Downloaddom4jjar.htm
xmlbeans-2.3.0.jar:http://www.jarfinder.com/index.php/jars/versionInfo/14388
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
public class ExcelParser {
private XSSFWorkbook workbook;
public ExcelParser(File file) {
try {
workbook = new XSSFWorkbook(new FileInputStream(file));
} catch (IOException e) {
e.printStackTrace();
}
}
private XSSFSheet getSheet(XSSFWorkbook workBook, int index) {
return workBook.getSheetAt(index);
}
private XSSFSheet getSheet(XSSFWorkbook workBook, String name) {
return workBook.getSheet(name);
}
public List> getDatasInSheet(int sheetIndex) {
List> results = new ArrayList>();
XSSFSheet sheet = workbook.getSheetAt(sheetIndex);
int rowCount = sheet.getLastRowNum();
if (rowCount < 1) {
return results;
} else {
for (int i = 0; i < rowCount; i++) {
XSSFRow row = sheet.getRow(i);
if (row != null) {
List rowData = new ArrayList();
int cellCount = row.getLastCellNum();
for (int cellIndex = 0; cellIndex < cellCount; cellIndex++) {
XSSFCell cell = row.getCell(cellIndex);
Object cellStr = this.getCellData(cell);
String ce = cellStr == null ? "" : cellStr.toString();
rowData.add(ce);
}
results.add(rowData);
}
}
return results;
}
}
private Object getCellData(XSSFCell cell) {
Object result = null;
if (cell != null) {
int cellType = cell.getCellType();
switch (cellType) {
case HSSFCell.CELL_TYPE_STRING:
result = cell.getRichStringCellValue().getString();
break;
case HSSFCell.CELL_TYPE_NUMERIC:
result = cell.getNumericCellValue();
break;
case HSSFCell.CELL_TYPE_FORMULA:
result = cell.getNumericCellValue();
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
result = cell.getBooleanCellValue();
break;
case HSSFCell.CELL_TYPE_BLANK:
result = null;
break;
case HSSFCell.CELL_TYPE_ERROR:
result = null;
break;
default:
result = null;
System.out.println("NO type matched! ");
break;
}
}
return result;
}
public static void main(String[] args) {
System.out.println("hi");
ExcelParser excelParser = new ExcelParser(new File(
"F:\\Test\\ExcelParser\\分析.xlsx"));
List> results = excelParser.getDatasInSheet(0);
int dataSize = results.size();
for (int i = 0; i < dataSize; i++) {
List row = results.get(i);
for (int j = 0; j < row.size(); j++) {
String cell = row.get(j);
System.out.print(cell + "\t");
}
System.out.println();
}
TextExtractor textExtractor = new TextExtractor("F:\\Test\\ExcelParser\\论文阅读摘要.docx");
System.out.println(textExtractor.getContent());
}
}
class TextExtractor {
private File file;
public TextExtractor(String path) {
file = new File(path);
}
public String getContent() {
try {
POITextExtractor textExtractor = ExtractorFactory.createExtractor(file);
return textExtractor.getText();
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (OpenXML4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (XmlException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
}
引用:http://my.csdn.net/guyezi123/code/detail/30042