java html转word、pdf(包含图片)

html转word

maven依赖

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.14</version>
</dependency>
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.11.3</version>
</dependency>

核心代码

import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Entities;
import org.jsoup.select.Elements;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;

import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;

import org.jsoup.nodes.Document;
import sun.misc.BASE64Encoder;

@RestController
public class WordController {

    @GetMapping("/")
    public String html2doc(HttpServletResponse response) throws IOException {
        byte b[] = getHtml().getBytes();
        ByteArrayInputStream bais = new ByteArrayInputStream(b);
        POIFSFileSystem poifs = new POIFSFileSystem();
        DirectoryEntry directory = poifs.getRoot();
        DocumentEntry documentEntry = directory.createDocument("WordDocument", bais);

        //输出文件
        String name = "test";
        name = java.net.URLEncoder.encode(name, "UTF-8");
        response.reset();
        response.setHeader("Content-Disposition",
                "attachment;filename=" +
                        new String((name + ".doc").getBytes(),
                                "utf-8"));
        response.setContentType("application/msword;charset=utf-8");
        OutputStream ostream = response.getOutputStream();
        //输出到本地文件的话,new一个文件流

        poifs.writeFilesystem(ostream);
        bais.close();
        ostream.close();
        return null;
    }

    public String getHtml() {

        String content = "<p><img src=\"https://www.baidu.com/img/PCtm_d9c8750bed0b3c7d089fa7d55720d6cf.png\" alt=\"\" width=\"233\" height=\"233\" /></p>";
        Document doc = Jsoup.parse(content);
        Elements img = doc.select("img");
        img.forEach(p -> {
            p.attr("src", "data:image/jpeg;base64," + ImageToBase64ByOnline(p.attr("src")));
        });
        doc.head().append("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");
        // jsoup标准化标签,生成闭合标签
        doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
        doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);

        return doc.html();
    }


    /**
     * 在线图片转换成base64字符串
     *
     * @param imgURL 图片线上路径
     * @return
     */
    public static String ImageToBase64ByOnline(String imgURL) {
        ByteArrayOutputStream data = new ByteArrayOutputStream();
        try {
            // 创建URL
            URL url = new URL(imgURL);
            byte[] by = new byte[1024];
            // 创建链接
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            conn.setRequestMethod("GET");
            conn.setConnectTimeout(5000);
            InputStream is = conn.getInputStream();
            // 将内容读取内存中
            int len = -1;
            while ((len = is.read(by)) != -1) {
                data.write(by, 0, len);
            }
            // 关闭流
            is.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 对字节数组Base64编码
        BASE64Encoder encoder = new BASE64Encoder();
        return encoder.encode(data.toByteArray());
    }
}

html转pdf

maven依赖

<dependency>
    <groupId>org.xhtmlrenderer</groupId>
    <artifactId>core-renderer</artifactId>
    <version>R8</version>
</dependency>
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.11.3</version>
</dependency>

核心代码

import com.lowagie.text.DocumentException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Entities;
import org.jsoup.select.Elements;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;

import javax.servlet.http.HttpServletResponse;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;

import org.jsoup.nodes.Document;
import sun.misc.BASE64Encoder;

@RestController
public class PdfController {
    
    @GetMapping("/")
    public String html2pdf(HttpServletResponse response) throws DocumentException, IOException {
        
        ITextRenderer renderer = new ITextRenderer();
        //图片base64支持,把图片转换为itext自己的图片对象
        renderer.getSharedContext().setReplacedElementFactory(new Base64ImgReplacedElementFactory());
        renderer.getSharedContext().getTextRenderer().setSmoothingThreshold(0);
        
        renderer.setDocumentFromString(getHtml());
        ITextFontResolver fontResolver = renderer.getFontResolver();
        renderer.layout();
        
        String fileName = "test.pdf";
        fileName = java.net.URLEncoder.encode(fileName, "UTF-8");
        response.reset();
        response.setCharacterEncoding("UTF-8");
        response.setContentType("application/pdf");
        //打开浏览器窗口预览文件
        response.setHeader("Content-Disposition", "filename=" + new String(fileName.getBytes(), "iso8859-1"));
        OutputStream ostream = response.getOutputStream();
        renderer.createPDF(ostream);
        ostream.close();
        return null;
    }
    
    
    public String getHtml() {
        
        String content = "<p><img src=\"https://www.baidu.com/img/PCtm_d9c8750bed0b3c7d089fa7d55720d6cf.png\" alt=\"\" width=\"233\" height=\"233\" /></p>";
        Document doc = Jsoup.parse(content);
        Elements img = doc.select("img");
        img.forEach(p -> {
            p.attr("src", "data:image/jpeg;base64," + ImageToBase64ByOnline(p.attr("src")));
        });
        doc.head().append("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");
        // jsoup标准化标签,生成闭合标签
        doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
        doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
        
        return doc.html();
    }
    
    
    /**
    * 在线图片转换成base64字符串
    *
    * @param imgURL 图片线上路径
    * @return
    */
    public static String ImageToBase64ByOnline(String imgURL) {
        ByteArrayOutputStream data = new ByteArrayOutputStream();
        try {
            // 创建URL
            URL url = new URL(imgURL);
            byte[] by = new byte[1024];
            // 创建链接
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            conn.setRequestMethod("GET");
            conn.setConnectTimeout(5000);
            InputStream is = conn.getInputStream();
            // 将内容读取内存中
            int len = -1;
            while ((len = is.read(by)) != -1) {
                data.write(by, 0, len);
            }
            // 关闭流
            is.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 对字节数组Base64编码
        BASE64Encoder encoder = new BASE64Encoder();
        return encoder.encode(data.toByteArray());
    }
}

Base64ImgReplacedElementFactory类

import java.io.IOException;


import org.w3c.dom.Element;
import org.xhtmlrenderer.extend.FSImage;
import org.xhtmlrenderer.extend.ReplacedElement;
import org.xhtmlrenderer.extend.ReplacedElementFactory;
import org.xhtmlrenderer.extend.UserAgentCallback;
import org.xhtmlrenderer.layout.LayoutContext;
import org.xhtmlrenderer.pdf.ITextFSImage;
import org.xhtmlrenderer.pdf.ITextImageElement;
import org.xhtmlrenderer.render.BlockBox;
import org.xhtmlrenderer.simple.extend.FormSubmissionListener;


import com.lowagie.text.BadElementException;
import com.lowagie.text.Image;
import com.lowagie.text.pdf.codec.Base64;
/**
 * 图片base64支持,把图片转换为itext自己的图片对象
 *
 */
public class Base64ImgReplacedElementFactory implements ReplacedElementFactory {


    /**
     * 实现createReplacedElement 替换html中的Img标签
     *
     * @param c 上下文
     * @param box 盒子
     * @param uac 回调
     * @param cssWidth css宽
     * @param cssHeight css高
     * @return ReplacedElement
     */
    @Override
    public ReplacedElement createReplacedElement(LayoutContext c, BlockBox box, UserAgentCallback uac,
                                                 int cssWidth, int cssHeight) {
        Element e = box.getElement();
        if (e == null) {
            return null;
        }
        String nodeName = e.getNodeName();
        // 找到img标签
        if (nodeName.equals("img")) {
            String attribute = e.getAttribute("src");
            FSImage fsImage;
            try {
                // 生成itext图像
                fsImage = buildImage(attribute, uac);
            } catch (BadElementException e1) {
                fsImage = null;
            } catch (IOException e1) {
                fsImage = null;
            }
            if (fsImage != null) {
                // 对图像进行缩放
                if (cssWidth != -1 || cssHeight != -1) {
                    fsImage.scale(cssWidth, cssHeight);
                }
                return new ITextImageElement(fsImage);
            }
        }

        return null;
    }


    /**
     * 编解码base64并生成itext图像
     */
    protected FSImage buildImage(String srcAttr, UserAgentCallback uac) throws IOException,
            BadElementException {
        FSImage fiImg=null;
        if (srcAttr.toLowerCase().startsWith("data:image/")) {
            String base64Code= srcAttr.substring(srcAttr.indexOf("base64,") + "base64,".length(),
                    srcAttr.length());
            // 解码
            byte[] decodedBytes = Base64.decode(base64Code);


            fiImg= new ITextFSImage(Image.getInstance(decodedBytes));
        } else {
            fiImg= uac.getImageResource(srcAttr).getImage();
        }
        return fiImg;
    }


    @Override
    public void reset() {}

    @Override
    public void remove(Element arg0) {}

    @Override
    public void setFormSubmissionListener(FormSubmissionListener arg0) {}

}

  • 0
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
可以使用 Apache POI 来实现 Java 中的 HTML Word。具体步骤如下: 1. 引入 Apache POI 的依赖,例如 Maven 项目中可在 pom.xml 文件中添加以下依赖: ```xml <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.2</version> </dependency> ``` 2. 创建一个空的 Word 文档对象: ```java XWPFDocument doc = new XWPFDocument(); ``` 3. 将 HTML 换为 Word 文档中的内容: ```java // 假设 htmlContent 是包含 HTML 内容的字符串 String htmlContent = "<html><body><h1>Hello, World!</h1></body></html>"; // 创建一个输入流,将 HTML 内容写入其中 InputStream input = new ByteArrayInputStream(htmlContent.getBytes()); // 创建一个输出流,将 Word 内容写入其中 XWPFParagraph paragraph = doc.createParagraph(); XWPFRun run = paragraph.createRun(); run.setText("Converted from HTML:"); run.addBreak(); XWPFHtmlConverter.getInstance().convert(input, run); ``` 4. 将 Word 文档保存到本地文件系统: ```java FileOutputStream out = new FileOutputStream("output.docx"); doc.write(out); out.close(); doc.close(); ``` 完整的代码示例: ```java import java.io.ByteArrayInputStream; import java.io.FileOutputStream; import java.io.InputStream; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRun; import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; import org.apache.poi.xwpf.converter.core.FileURIResolver; import org.apache.poi.xwpf.converter.core.IURIResolver; public class HtmlToWord { public static void main(String[] args) throws Exception { // 创建一个空的 Word 文档对象 XWPFDocument doc = new XWPFDocument(); // 假设 htmlContent 是包含 HTML 内容的字符串 String htmlContent = "<html><body><h1>Hello, World!</h1></body></html>"; // 创建一个输入流,将 HTML 内容写入其中 InputStream input = new ByteArrayInputStream(htmlContent.getBytes()); // 创建一个输出流,将 Word 内容写入其中 XWPFParagraph paragraph = doc.createParagraph(); XWPFRun run = paragraph.createRun(); run.setText("Converted from HTML:"); run.addBreak(); XHTMLConverter.getInstance().convert(input, run, new FileURIResolver()); // 将 Word 文档保存到本地文件系统 FileOutputStream out = new FileOutputStream("output.docx"); doc.write(out); out.close(); doc.close(); } } ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值