POI读取word文件,(支持HSSF和XSSF两种方式)

POI读取word文件,(支持HSSF和XSSF两种方式)

参考:HSSF,XSSF,SXSSF三种方式

1.引用maven(版本必须一致)

   <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi</artifactId>
      <version>4.1.1</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml</artifactId>
      <version>4.1.1</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-scratchpad</artifactId>
      <version>4.1.1</version>
    </dependency>

2.读取word

public class POIUtil {

  /**
   * @Description: POI 读取  word
   * @create: 2019-07-27 9:48
   * @update logs
   */
  public static String readWord(String path) throws Exception {

//    WordExtractor extractor = new WordExtractor(is);

    String content = null;
    File file = new File(path);
    if (file.exists() && file.isFile()) {
      InputStream is = null;
      XWPFDocument xwpfDocument = null;
      POIXMLTextExtractor extractor = null;
      HWPFDocument hwpfDocument = null;
      WordExtractor wordExtractor = null;
      try {
        is = new FileInputStream(file);
        xwpfDocument = new XWPFDocument(is);
        extractor = new XWPFWordExtractor(xwpfDocument);
        // 文档文本内容
        content = extractor.getText();
//          // 文档图片内容
//          List<XWPFPictureData> pictures = docx.getAllPictures();
//          for (XWPFPictureData picture : pictures) {
//            byte[] bytev = picture.getData();
//            // 输出图片到磁盘
//            FileOutputStream out = new FileOutputStream(
//                "D:\\temp\\temp\\" + UUID.randomUUID() + picture.getFileName());
//            out.write(bytev);
//            out.close();
//          }
      } catch (FileNotFoundException e) {
      } catch (IOException e) {
      } catch (OLE2NotOfficeXmlFileException e) {//较低版本的word文件
        is = new FileInputStream(file);
        hwpfDocument = new HWPFDocument(is);
        wordExtractor = new WordExtractor(hwpfDocument);
        // 文档文本内容
        content = wordExtractor.getText();
      } finally {
        try {
          if (extractor != null) {
            extractor.close();
          }
          if (xwpfDocument != null) {
            xwpfDocument.close();
          }
          if (wordExtractor != null) {
            wordExtractor.close();
          }
          if (hwpfDocument != null) {
            hwpfDocument.close();
          }
          if (is != null) {
            is.close();
          }
        } catch (IOException e) {
        }
      }
    }
    return content;
  }

  public static void main(String[] args) {
    String path = "/Users/jj/Desktop/胜多负少的范德萨.doc";
//    String path = "/Users/jj/Desktop/测试1 2.doc";
//    String path = "/Users/jj/Desktop/测试1.docx";

    try {
      System.out.println(readWord(path));
    } catch (Exception e) {
      e.printStackTrace();
    }
  }


}
  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值