读取word2003文本
public String readDoc(File docFile) {
String text2003 = null;
try {
// word 2003: 图片不会被读取
InputStream is = new FileInputStream(docFile);
WordExtractor ex = new WordExtractor(is);
text2003 = ex.getText();
System.out.println(text2003);
is.close();
} catch (IOException e) {
e.printStackTrace();
}
return text2003;
}
读取word2003中表格内容:
public String readExcelInWord(File file) {
String text2003 = null;
try {
FileInputStream in = new FileInputStream(file);// 载入文档
POIFSFileSystem pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();// 得到文档的读取范围
TableIterator it = new TableIterator(range);
// 迭代文档中的表格
while (it.hasNext()) {
Table tb = (Table) it.next();
// 迭代行,默认从0开始
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
// 迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);// 取得单元格
// 循环单元格的段落
for (int k = 0; k < td.numParagraphs(); k++) {
Paragraph para = td.getParagraph(k);
String s = para.text().trim();
System.out.println(s);
}
} // end for cells
} // end for rows
} // end while
} catch (Exception e) {
e.printStackTrace();
}
return text2003;
}
Apache POI: http://poi.apache.org/download.html