poi读取word表格内容(.doc docx)
jar依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.9</version>
</dependency>`
`<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.9</version>
</dependency>`
`<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.9</version>
</dependency>
注意版本统一,避免包冲突
Bad type on operand stack
读取.docx 表格内容
public void readWork2007(File file) {
long start = System.currentTimeMillis ();
try (FileInputStream fin = new FileInputStream (file.getAbsolutePath ())) {
System.out.println ("这是.docx文件,开始解析-----");
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
//得到word文档的信息
XWPFDocument xwpf = new XWPFDocument (fin);
//得到word中的表格
Iterator<XWPFTable> it = xwpf.getTablesIterator ();
// 设置需要读取的表格 set是设置需要读取的第几个表格,total是文件中表格的总数
while (it.hasNext ()) {
XWPFTable table = it.next ();
//读取每一行数据
for (int i = 0; i < table.getNumberOfRows (); i++) {
XWPFTableRow row = table.getRow (i);
//读取每一列数据
List<XWPFTableCell> cells = row.getTableCells ();
for (XWPFTableCell cell : cells) {
//输出当前的单元格的数据
System.out.print (cell.getText () + "\t");
}
System.out.println ();
}
…… 省略
}
读取.doc 表格内容
public void readWord2003(File file){
try (FileInputStream fin = new FileInputStream (file.getAbsolutePath ())) {
System.out.println ("这是.doc文件,------开始解析---------");
POIFSFileSystem pfs = new POIFSFileSystem (fin);
HWPFDocument hwpf = new HWPFDocument (pfs);
//得到文档的读取范围
Range range = hwpf.getRange ();
TableIterator tableIterator = new TableIterator (range);
List<String> wordText= analysis (tableIterator);
wordText.forEach (aa -> System.out.println (aa.toString ()));
}
……省略
}
private List<String> analysis(TableIterator tablesIterator) {
List<String> wordTexts = new ArrayList<> ();
while (tablesIterator.hasNext ()) {
Table table = tablesIterator.next ();
for (int i = 1; i < table.numRows (); i++) {
TableRow row = table.getRow (i);
// 这里只取 1 3 5 列单元格
wordTexts .add( row.getCell(0).getText().tream());
wordTexts .add( row.getCell(2).getText().tream());
wordTexts .add( row.getCell(4).getText().tream());
}
}
return wordTexts ;
}