Java POI解析带图片的word文档

Java POI解析带图片的word文档

添加依赖

		<!-- word文档解析依赖 -->
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.11.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>5.2.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>5.2.3</version>
        </dependency>

待解析的word文档

word模板截图

直接上代码

public class WordParseServiceImpl{

	@Autowire
	private FileService fileService;

	/**
	 * word文档解析
	 */
public Boolean wordParse() {
        File file = new File("xxxxx/word.docx");
        try (InputStream fis = new FileInputStream(file)) {
            XWPFDocument document = new XWPFDocument(fis);
            List<XWPFParagraph> paragraphs = document.getParagraphs();

            // 解析文档,获取一级标题及一级标题下的内容
            Map<String, List<XWPFParagraph>> documentStructure = parseHeadDocument(paragraphs, "1");
			// 遍历一级标题
            for (Map.Entry<String, List<XWPFParagraph>> h1Entry : documentStructure.entrySet()) {
            	// 一级标题文本
                System.err.println(h1Entry.getKey());
                List<XWPFParagraph> h1Paragraphs = h1Entry.getValue();
            	// 解析文档,获取二级标题及二级标题下的内容
                Map<String, List<XWPFParagraph>> h2Map = parseHeadDocument(h1Paragraphs, "2");
                for (Map.Entry<String, List<XWPFParagraph>> h2Entry : h2Map.entrySet()) {
                	// 二级标题文本
                    System.err.println(h2Entry.getKey());
                    List<XWPFParagraph> list = h2Entry.getValue();
					// 遍历二级标题
                    for (XWPFParagraph xwpfParagraph : list) {
                    	// 包含图片,对图片做处理
                        String images = containsImages(xwpfParagraph);
                        if (StringUtils.isNotBlank(images)) {
                        	// 图片的url
                            System.err.println(images);
                        } else if (StringUtils.isNotBlank(xwpfParagraph.getText())) {
                        	// 二级标题下的正文
                            System.err.println(xwpfParagraph.getText());
                        }
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

	/**
	 * 图片处理
	 */
    private String containsImages(XWPFParagraph xwpfParagraph) {
        for (XWPFRun run : xwpfParagraph.getRuns()) {
            List<XWPFPicture> pictures = run.getEmbeddedPictures();
            for (XWPFPicture picture : pictures) {
                XWPFPictureData pictureData = picture.getPictureData();
                String fileName = pictureData.getFileName();
                byte[] bytes = pictureData.getData();
                String contentType = pictureData.getPictureTypeEnum().getContentType();
                String name = fileName.substring(0, fileName.lastIndexOf("."));
                MockMultipartFile file = new MockMultipartFile(name, fileName, contentType, bytes);

                FileUploadDTO dto = new FileUploadDTO();
                dto.setFile(file);
                dto.setType(FileEntityType.test);
                FileDTO upload = fileService.upload(dto);
                return upload.getPreviewUrl();
            }
        }
        return null;
    }

	/**
	 * 组装层级结构
	 */
    private Map<String, List<XWPFParagraph>> parseHeadDocument(List<XWPFParagraph> paragraphs, String heading) {
        Map<String, List<XWPFParagraph>> map = new LinkedHashMap<>();

        String title = "";
        List<XWPFParagraph> list = new ArrayList<>();
        for (XWPFParagraph paragraph : paragraphs) {
            String style = paragraph.getStyle();
            if (style != null && style.equals(heading)) {
                if (StringUtils.isNotBlank(title) && !title.equals(paragraph.getText())) {
                    List<XWPFParagraph> titleList = new ArrayList<>(list);
                    map.put(title, titleList);

                    list = new ArrayList<>();
                }
                title = paragraph.getText();
            } else {
                list.add(paragraph);
            }
        }
        if (StringUtils.isNotBlank(title)) {
            map.put(title, list);
        }
        return map;
    }
}

至此结束!

  • 3
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值