md转pdf

md转pdf

markdown转pdf,先转为html在转换为pdf

中文字体文件

simhei.ttf,需要和ccs里面的字体类型保持一致
在C:\Windows\Fonts 目录下,查找【黑体 常规】,粘贴出来即可
在这里插入图片描述
在MdToPdfUtils.initConverterProperties方法中修改字体相对路径
在这里插入图片描述
在MdToPdfUtils.CssHtml方法中设置字体类型,并可配置ccs样式展示

<body style="font-size:12.0pt; font-family:simhei">

maven依赖

<dependency>
            <groupId>org.commonmark</groupId>
            <artifactId>commonmark</artifactId>
            <version>0.21.0</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>layout</artifactId>
            <version>8.0.4</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>html2pdf</artifactId>
            <version>5.0.4</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>kernel</artifactId>
            <version>8.0.4</version>
        </dependency>

代码片段

入参实体

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.io.Serializable;

/**
 * markdown
 *
 * @Author: 
 * @Date: 2024/8/8 18:54
 */
@Builder
@NoArgsConstructor
@AllArgsConstructor
@Data
public class MarkdownDTO implements Serializable {
    /**
     * markdown 内容,md格式
     */
    private String content;
    /**
     * markdown 标题, 标题字符串
     */
    private String title;
}

工具类


import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.*;
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
import com.itextpdf.kernel.pdf.navigation.PdfDestination;
import com.itextpdf.kernel.pdf.navigation.PdfExplicitRemoteGoToDestination;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.styledxmlparser.css.media.MediaDeviceDescription;
import com.itextpdf.styledxmlparser.css.media.MediaType;
import lombok.extern.slf4j.Slf4j;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Entities;

import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.net.URL;
import java.net.URLEncoder;
import java.util.List;
import java.util.stream.Collectors;

/**
 * TODO
 *
 * @Author: 
 * @Date: 2024/8/7 11:23
 */
@Slf4j
public class MdToPdfUtils {

    public static void convertPDF(List<MarkdownDTO> dtos, String fileName, HttpServletResponse resp) throws IOException {

        // 解析md为html,将多个md的标题和内容拼接
        String html = dtos.stream().map(dto-> parserTile(dto.getTitle()) + parserMd(dto.getContent()))
                .collect(Collectors.joining("\n\n"));

        // 拼接css设置样式
        html = CssHtml(html);

        //  解决HTML标签没有关闭,导致的转换异常
        html = formatHtml(html);

        // 解决 &#xa0;乱码问题
        html = html.replaceAll("&#xa0;", " ");

        //获取书签
        List<String> titles = dtos.stream().map(MarkdownDTO::getTitle).collect(Collectors.toList());

        resp.setCharacterEncoding("UTF-8");
        resp.setHeader("content-Type", "application/pdf");
        resp.setHeader("Content-Disposition",
                "attachment;filename=" + URLEncoder.encode(fileName + ".pdf", "UTF-8"));
        ServletOutputStream outputStream = resp.getOutputStream();
        // html转换为 PDF
        outputStream.write(htmlToPdf(html, fileName, titles));
        outputStream.close();
    }

    private static String parserMd(String markdown) {
        // 解析 Markdown 为 html
        Parser parser = Parser.builder().build();
        HtmlRenderer renderer = HtmlRenderer.builder().build();
        String html = renderer.render(parser.parse(markdown));
        return html;
    }

    private static String parserTile(String title) {
        // 拼接书签
        return "<h1>" + title + "</h1>\n";
    }

    private static String formatHtml(String html) {
        org.jsoup.nodes.Document doc = Jsoup.parse(html);

        // jsoup标准化标签,生成闭合标签
        doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
        doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
        return doc.html();
    }

    private static byte[] htmlToPdf(String content, String fileName, List<String> titles) {
        // html转pdf,获取pdf字节流
        byte[] pdfContentByte = getPdfContentByte(content);
        // 给pdf加上书签并支持跳转
        byte[] pdfContentWithTitleByte = addPdfTitles(titles, pdfContentByte);
        return pdfContentWithTitleByte;
    }

    private static byte[] getPdfContentByte(String content) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        PdfWriter writer = new PdfWriter(byteArrayOutputStream);
        PdfDocument pdfDoc = new PdfDocument(writer);
        pdfDoc.setDefaultPageSize(new PageSize(PageSize.A4));
        try {
            // 处理中文字体,解决中文不显示问题
            ConverterProperties converterProperties = initConverterProperties();
            // html转换为pdf
            HtmlConverter.convertToPdf(new ByteArrayInputStream(content.getBytes("UTF-8")), pdfDoc, converterProperties);
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }catch (Exception e){
            e.printStackTrace();
        } finally{
            return byteArrayOutputStream.toByteArray();
        }
    }

    private static byte[] addPdfTitles(List<String> titles, byte[] content) {
        InputStream inputStream = new ByteArrayInputStream(content);
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        try {
            PdfDocument pdfDoc = new PdfDocument(new PdfReader(inputStream), new PdfWriter(byteArrayOutputStream));
            pdfDoc.setDefaultPageSize(new PageSize(PageSize.A4));
            // 设置根目录书签
            PdfOutline root = pdfDoc.getOutlines(true);

           int index = 0;
            for (int page = 1; page <= pdfDoc.getNumberOfPages(); page++) {
                String pageContent = PdfTextExtractor.getTextFromPage(pdfDoc.getPage(page));
                int pageNum = page;
                long count = titles.stream().skip(index).filter(title -> {
                    if (pageContent.contains(title)) {
                        log.info("{} : Text found on page {}", title, pageNum);
                        // 设置书签,并支持跳转
                        PdfOutline pdfOutline = root.addOutline(title);
                        PdfDestination destination = PdfExplicitRemoteGoToDestination.createFitV(pageNum, 2);
                        pdfOutline.addDestination(destination);
                        return true;
                    }
                    return false;
                }).count();
                if(count>0) {
                    index += count;
                }
            }
            pdfDoc.close();
        } catch (IOException exception) {
            exception.printStackTrace();
        } finally {
            return byteArrayOutputStream.toByteArray();
        }
    }


    private static ConverterProperties initConverterProperties(){
        ConverterProperties converterProperties = new ConverterProperties();

        //  processing the properties of the СSS  或者 creating the SVG
        MediaDeviceDescription mediaDeviceDescription = new MediaDeviceDescription(MediaType.PRINT);

        converterProperties.setMediaDeviceDescription(mediaDeviceDescription);

        // 字体设置, itext 默认不支持中文
        FontProvider fontProvider = new FontProvider();
        // 字体文件相对路径,需要在windows和linux服务器上安装,或者上传
        String fontPath = "/home/app/pdf/simhei.ttf";
        log.info("字体路径:{}", fontPath);
        fontProvider.addFont(fontPath);
        converterProperties.setFontProvider(fontProvider);

        return  converterProperties;
    }

    private static String CssHtml(String html) {
        String ccs = "<!DOCTYPE html>\n" +
                "<html lang=\"zh\">\n" +
                " <head> \n" +
                "  <meta charset=\"utf-8\" /> \n" +
                "  <style type=\"text/css\">\n" +
                "    body{\n" +
                "        width: 100%;\n" +
                "        margin: 0;\n" +
                "        padding: 0;\n" +
                "    }\n" +
                "    \n" +
                "    body > *:first-child {\n" +
                "            margin-top: 0 !important;\n" +
                "        }\n" +
                "        \n" +
                "        body > *:last-child {\n" +
                "            margin-bottom: 0 !important;\n" +
                "        }\n" +
                "        \n" +
                "        a {\n" +
                "            color: #0052d9;\n" +
                "            /*font-weight: 600;*/\n" +
                "            padding: 0 2px;\n" +
                "            text-decoration: none;\n" +
                "        }\n" +
                "        \n" +
                "        h1,\n" +
                "        h2,\n" +
                "        h3,\n" +
                "        h4,\n" +
                "        h5,\n" +
                "        h6 {\n" +
                "            position: relative;\n" +
                "            margin-top: 15px;\n" +
                "            margin-bottom: 10px;\n" +
                "            font-weight: bold;\n" +
                "            line-height: 1.4;\n" +
                "            cursor: text;\n" +
                "        }\n" +
                "        \n" +
                "        h1:hover a.anchor,\n" +
                "        h2:hover a.anchor,\n" +
                "        h3:hover a.anchor,\n" +
                "        h4:hover a.anchor,\n" +
                "        h5:hover a.anchor,\n" +
                "        h6:hover a.anchor {\n" +
                "            text-decoration: none;\n" +
                "        }\n" +
                "        \n" +
                "        h1 tt,\n" +
                "        h1 code {\n" +
                "            font-size: inherit !important;\n" +
                "        }\n" +
                "        \n" +
                "        h2 tt,\n" +
                "        h2 code {\n" +
                "            font-size: inherit !important;\n" +
                "        }\n" +
                "        \n" +
                "        h3 tt,\n" +
                "        h3 code {\n" +
                "            font-size: inherit !important;\n" +
                "        }\n" +
                "        \n" +
                "        h4 tt,\n" +
                "        h4 code {\n" +
                "            font-size: inherit !important;\n" +
                "        }\n" +
                "        \n" +
                "        h5 tt,\n" +
                "        h5 code {\n" +
                "            font-size: inherit !important;\n" +
                "        }\n" +
                "        \n" +
                "        h6 tt,\n" +
                "        h6 code {\n" +
                "            font-size: inherit !important;\n" +
                "        }\n" +
                "        \n" +
                "        h2 a,\n" +
                "        h3 a {\n" +
                "            color: #34495e;\n" +
                "        }\n" +
                "        p\n" +
                "        blockquote,\n" +
                "        ul,\n" +
                "        li,\n" +
                "        ol,\n" +
                "        dl,\n" +
                "        table {\n" +
                "            margin: 10px 0px;\n" +
                "            border: 1px solid #f6f6f6;\n" +
                "            border-collapse: separate;\n" +
                "            border-spacing: 0;\n" +
                "        }\n" +
                "        h1, h2, h3, h4, h5, h6 {\n" +
                "            font-weight: bold;\n" +
                "            color: #2c354d;\n" +
                "            margin-bottom: 16px;\n" +
                "            margin-top: 16px;\n" +
                "        }\n" +
                "        h1 {\n" +
                "            font-size: 30px;\n" +
                "        }\n" +
                "        h2, h3 {\n" +
                "            font-size: 24px;\n" +
                "            border: none;\n" +
                "            padding-bottom: 0;\n" +
                "            margin-top: 50px;\n" +
                "        }\n" +
                "        h4, h5, h6 {\n" +
                "            font-size: 14px;\n" +
                "        }\n" +
                "        p {\n" +
                "            font-size: 14px;\n" +
                "            color: #2c354d;\n" +
                "            margin: 16px 0;\n" +
                "        }\n" +
                "        .quote{\n" +
                "            border-left: 4px solid #42b983;\n" +
                "            color: #888;\n" +
                "            background-color: rgba(66, 185, 131, .2);\n" +
                "            margin-top: 15px;\n" +
                "            padding: 10px;\n" +
                "        }\n" +
                "        \n" +
                "        .on-focus-mode blockquote {\n" +
                "            border-left-color: rgba(85, 85, 85, 0.12);\n" +
                "        }\n" +
                "        table,\n" +
                "        td,\n" +
                "        tr,\n" +
                "        img,\n" +
                "        th {\n" +
                "        width: 100%;\n" +
                "        }\n" +
                "        table thead tr {\n" +
                "            height: 50px;\n" +
                "        }\n" +
                "        table thead tr th {\n" +
                "            background: #f2f3f7;\n" +
                "            font-size: 16px;\n" +
                "            padding: 0 10px;\n" +
                "            text-align: left;\n" +
                "            color: #2c354d;\n" +
                "            border: 1px solid #dcdee0;\n" +
                "            font-weight: 400;\n" +
                "            word-break: keep-all;\n" +
                "\n" +
                "        }\n" +
                "        table tbody tr {\n" +
                "            height: 50px;\n" +
                "        }\n" +
                "        table tbody tr td {\n" +
                "            padding: 0 10px;\n" +
                "            color: #2c354d;\n" +
                "            border: 1px solid #efefef;\n" +
                "            border-color: #dcdee0;\n" +
                "        }\n" +
                "        pre {\n" +
                "            font-size: 14px;\n" +
                "            border-radius: 0;\n" +
                "            overflow-x: auto;\n" +
                "            background: #f2f3f7;\n" +
                "            padding: 20px 30px;\n" +
                "            line-height: 28px;\n" +
                "            word-break: break-word;\n" +
                "            display: block;\n" +
                "        }\n" +
                "        code {\n" +
                "            color: #2c354d;\n" +
                "            padding: 0;\n" +
                "            margin: 0;\n" +
                "            word-break: normal;\n" +
                "            background-color: #f5f5f5;\n" +
                "            font-size: 14px;\n" +
                "            border-radius: 0;\n" +
                "            overflow-x: auto;\n" +
                "        }\n" +
                "    </style> \n" +
                " </head> \n" +
                " <body style=\"font-size:12.0pt; font-family:simhei\">";
        html = ccs + html + "</body>\n" +
                "                    </html>";
        return html;
    }
}

测试代码

 @ApiOperation("文档下载")
    @GetMapping("/download/test")
   public void download(HttpServletResponse response) {
        List<MarkdownDTO> dtos = new ArrayList<>();
        dtos.add(MarkdownDTO.builder().content("## 测试文档01\n" +
                "* 测试内容\n" +
                "* 测试类型").title("测试下载").build());
        try {
            MdToPdfUtils.convertPDF(dtos,"测试下载", response);
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

效果如下:
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值