目前最新版POI为3.8:
poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar
poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import javax.servlet.ServletException;
- import javax.servlet.http.HttpServlet;
- import javax.servlet.http.HttpServletRequest;
- import javax.servlet.http.HttpServletResponse;
- import org.apache.poi.hwpf.HWPFDocument;
- import org.apache.poi.hwpf.extractor.WordExtractor;
- import org.apache.poi.hwpf.model.PicturesTable;
- import org.apache.poi.hwpf.usermodel.CharacterRun;
- import org.apache.poi.hwpf.usermodel.Picture;
- import org.apache.poi.hwpf.usermodel.Range;
- public class WordDemo extends HttpServlet {
- private static final long serialVersionUID = 1L;
- public void doGet(HttpServletRequest request, HttpServletResponse response)
- throws ServletException, IOException {
- this.doPost(request, response);
- }
- public void doPost(HttpServletRequest request, HttpServletResponse response)
- throws ServletException, IOException {
- //从硬盘读取一个doc文档
- InputStream in = new FileInputStream("F:\\test.doc");
- //类从word文档中提取文本,非特殊情况下,都将使用getParagraphText()与getText()
- WordExtractor word = new WordExtractor(in);
- //获取段文本
- String [] strArray = word.getParagraphText();
- String str = word.getText();
- for(int i=0 ; i<strArray.length ; i++){
- System.out.println(strArray[i]+"\ti循环:"+i);
- }
- System.out.println(str +"\t --");
- //这个构造函数从InputStream中加载Word文档。
- HWPFDocument doc = new HWPFDocument((InputStream)new FileInputStream("F:\\test.doc"));
- //这个类为HWPF对象模型,对文档范围段操作
- Range range = doc.getRange(); //
- //看看此文档有多少个段落
- int num = range.numParagraphs();
- System.out.println(num+"段");
- //得到word数据流
- byte [] dataStream = doc.getDataStream();
- System.out.println("数据流长度:"+dataStream.length);
- //用于在一段范围内获得段落数
- int numChar = range.numCharacterRuns();
- System.out.println("CharacterRuns 数:"+numChar);
- //负责图像提取 和 确定一些文件某块是否包含嵌入的图像。
- PicturesTable table = new PicturesTable(doc, dataStream, null);
- for(int j=0 ; j<numChar ; j++){
- //这个类表示一个文本运行,有着共同的属性。
- CharacterRun run = range.getCharacterRun(j);
- //是否存在图片
- boolean bool = table.hasPicture(run);
- System.out.println("是否存在图片:"+bool);
- if(bool){
- //返回图片对象绑定到指定的CharacterRun
- Picture pic = table.extractPicture(run, true);
- //图片的内容字节写入到指定的输出流。
- pic.writeImageContent(new FileOutputStream("F:\\"+j+".bmp"));
- System.out.println("成功提取图片"+j+":");
- }
- }
- request.getRequestDispatcher("ok.jsp").forward(request, response);
- }
- }