使用poi进行word doc文件转成html

/**
 * doc格式文件转换成html支持图片上传至阿里云
 *
 * @param input
 * @return 
 * @throws IOException
 * @throws ParserConfigurationException
 * @throws TransformerException
 */
private String doc2Html(InputStream input) throws IOException, ParserConfigurationException, TransformerException {
    HWPFDocument wordDocument = new HWPFDocument(input);
    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

    //设置图片存放的位置
    wordToHtmlConverter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) -> {
        String path = "resource/wordPicture/" + UUID.randomUUID() + "." + pictureType.getExtension();
        try {
            InputStream in = new ByteArrayInputStream(content);
            //上传到阿里云或者本地
            ossUtil.upload(path, in);
            in.close();
        } catch (Exception e) {
            e.printStackTrace();
            path = "upload picture exception";
        } finally {
        }
        return path;
    });

    //解析word文档
    wordToHtmlConverter.processDocument(wordDocument);
    org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();

    //也可以使用字符数组流获取解析的内容
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    OutputStream outStream = new BufferedOutputStream(baos);

    DOMSource domSource = new DOMSource(htmlDocument);
    StreamResult streamResult = new StreamResult(outStream);

    TransformerFactory factory = TransformerFactory.newInstance();
    Transformer serializer = factory.newTransformer();
    serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);

    String content = baos.toString();
    baos.close();

    return content;
}

//调用
@RequestMapping("/testDoc2Html")
public voide test(@RequestParam MultipartFile file,
                  HttpServletResponse response) throws Exception {
    InputStream input = file.getInputStream();
    String content = doc2Html(input);
    System.out.println(content);
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值