package cn.ccb.boup.util.poi;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
*
* @author guojigang
*
*/
public class OperateWord {
/**
* 读取word文档的表格数据
*
* @param filePath
* @return List
*/
public static List readWordCell(String filePath) {
FileInputStream in = null;
POIFSFileSystem pfs = null;
List<String> list = new ArrayList<String>();
try {
in = new FileInputStream(filePath);// 载入文档
pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();// 得到文档的读取范围
TableIterator it = new TableIterator(range);
// 迭代文档中的表格
if (it.hasNext()) {
TableRow tr = null;
TableCell td = null;
Paragraph para = null;
String lineString;
String cellString;
Table tb = (Table) it.next();
// 迭代行,从第2行开始
for (int i = 2; i < tb.numRows(); i++) {
tr = tb.getRow(i);
lineString = "";
for (int j = 0; j < tr.numCells(); j++) {
td = tr.getCell(j);// 取得单元格
// 取得单元格的内容
for (int k = 0; k < td.numParagraphs(); k++) {
para = td.getParagraph(k);
cellString = para.text();
if (cellString != null
&& cellString.compareTo("") != 0) {
// 如果不trim,取出的内容后会有一个乱码字符
cellString = cellString.trim() + "|";
}
lineString += cellString;
}
}
// 去除字符串末尾的一个管道符
if (lineString != null && lineString.compareTo("") != 0) {
lineString = lineString.substring(0, lineString
.length() - 1);
}
list.add(lineString);
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (null != in) {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return list;
}
public static void main(String[] args) {
List list = OperateWord.readWordCell("C:\\身份核查结果.doc");
for (Iterator iter = list.iterator(); iter.hasNext();) {
String str = (String) iter.next();
System.err.println(str);
}
}
}
所需类库:
poi-3.8-beta5-20111217.jar
poi-scratchpad-3.8-beta5-20111217.jar
可以从http://poi.apache.org/下载。
单独测试没问题,但放到项目里,会报下面的错误,应该是由于类冲突引起的。
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.poi.POIDocument.<init>(Lorg/apache/poi/poifs/filesystem/DirectoryNode;)V
at org.apache.poi.hwpf.HWPFDocumentCore.<init>(HWPFDocumentCore.java:146)
at org.apache.poi.hwpf.HWPFDocument.<init>(HWPFDocument.java:218)
at org.apache.poi.hwpf.HWPFDocument.<init>(HWPFDocument.java:186)
at cn.ccb.boup.util.poi.OperateWord.readWordCell(OperateWord.java:37)
at cn.ccb.boup.util.poi.OperateWord.main(OperateWord.java:87)
解决办法是:打开项目的properties->java build path->order and export,发现poi的2个jar包放到了最后,把poi的包移到最上面。问题解决!