word表格做数据库导入,用的是poi 3.8的包,jdk我用的是1.6
package com.word;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
/**
*
* 读取word表格导入数据库,支持doc、docx
* @author muyunfei
* @qq 1147417467
* @company 海颐软件
* <p>Modification History:</p>
* <p>Date Author Description</p>
* <p>------------------------------------------------------------------</p>
* <p>Aug 31, 2015 牟云飞 新建</p>
*/
public class ExportDocImpl {
public static void main(String[] args) {
ExportDocImpl test = new ExportDocImpl();
//String filePath="E:\\java导入word表格.doc";
String filePath="E:\\java导入word表格.docx";
test.testWord(filePath);
}
public void testWord(String filePath){
try{
FileInputStream in = new FileInputStream(filePath);//载入文档
//如果是office2007 docx格式
if(filePath.toLowerCase().endsWith("docx")){
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
XWPFDocument xwpf = new XWPFDocument(in);//得到word文档的信息
// List<XWPFParagraph> listParagraphs = xwpf.getParagraphs();//得到段落信息
Iterator<XWPFTable> it = xwpf.getTablesIterator();//得到word中的表格
while(it.hasNext()){
XWPFTable table = it.next();
List<XWPFTableRow> rows=table.getRows();
//读取每一行数据
for (int i = 0; i < rows.size(); i++) {
XWPFTableRow row = rows.get(i);
//读取每一列数据
List<XWPFTableCell> cells = row.getTableCells();
for (int j = 0; j < cells.size(); j++) {
XWPFTableCell cell=cells.get(j);
//输出当前的单元格的数据
System.out.println(cell.getText());
}
}
}
}else{
//如果是office2003 doc格式
POIFSFileSystem pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();//得到文档的读取范围
TableIterator it = new TableIterator(range);
//迭代文档中的表格
while (it.hasNext()) {
Table tb = (Table) it.next();
//迭代行,默认从0开始
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text();
//去除后面的特殊符号
if(null!=s&&!"".equals(s)){
s=s.substring(0, s.length()-1);
}
System.out.println(s);
}
}
}
}
}
}catch(Exception e){
e.printStackTrace();
}
}
}