md转pdf
markdown转pdf,先转为html在转换为pdf
中文字体文件
simhei.ttf,需要和ccs里面的字体类型保持一致
在C:\Windows\Fonts 目录下,查找【黑体 常规】,粘贴出来即可
在MdToPdfUtils.initConverterProperties方法中修改字体相对路径
在MdToPdfUtils.CssHtml方法中设置字体类型,并可配置ccs样式展示
<body style="font-size:12.0pt; font-family:simhei">
maven依赖
<dependency>
<groupId>org.commonmark</groupId>
<artifactId>commonmark</artifactId>
<version>0.21.0</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>layout</artifactId>
<version>8.0.4</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>html2pdf</artifactId>
<version>5.0.4</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>kernel</artifactId>
<version>8.0.4</version>
</dependency>
代码片段
入参实体
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.io.Serializable;
/**
* markdown
*
* @Author:
* @Date: 2024/8/8 18:54
*/
@Builder
@NoArgsConstructor
@AllArgsConstructor
@Data
public class MarkdownDTO implements Serializable {
/**
* markdown 内容,md格式
*/
private String content;
/**
* markdown 标题, 标题字符串
*/
private String title;
}
工具类
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.pdf.*;
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
import com.itextpdf.kernel.pdf.navigation.PdfDestination;
import com.itextpdf.kernel.pdf.navigation.PdfExplicitRemoteGoToDestination;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.styledxmlparser.css.media.MediaDeviceDescription;
import com.itextpdf.styledxmlparser.css.media.MediaType;
import lombok.extern.slf4j.Slf4j;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Entities;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
import java.io.*;
import java.net.URL;
import java.net.URLEncoder;
import java.util.List;
import java.util.stream.Collectors;
/**
* TODO
*
* @Author:
* @Date: 2024/8/7 11:23
*/
@Slf4j
public class MdToPdfUtils {
public static void convertPDF(List<MarkdownDTO> dtos, String fileName, HttpServletResponse resp) throws IOException {
// 解析md为html,将多个md的标题和内容拼接
String html = dtos.stream().map(dto-> parserTile(dto.getTitle()) + parserMd(dto.getContent()))
.collect(Collectors.joining("\n\n"));
// 拼接css设置样式
html = CssHtml(html);
// 解决HTML标签没有关闭,导致的转换异常
html = formatHtml(html);
// 解决  乱码问题
html = html.replaceAll(" ", " ");
//获取书签
List<String> titles = dtos.stream().map(MarkdownDTO::getTitle).collect(Collectors.toList());
resp.setCharacterEncoding("UTF-8");
resp.setHeader("content-Type", "application/pdf");
resp.setHeader("Content-Disposition",
"attachment;filename=" + URLEncoder.encode(fileName + ".pdf", "UTF-8"));
ServletOutputStream outputStream = resp.getOutputStream();
// html转换为 PDF
outputStream.write(htmlToPdf(html, fileName, titles));
outputStream.close();
}
private static String parserMd(String markdown) {
// 解析 Markdown 为 html
Parser parser = Parser.builder().build();
HtmlRenderer renderer = HtmlRenderer.builder().build();
String html = renderer.render(parser.parse(markdown));
return html;
}
private static String parserTile(String title) {
// 拼接书签
return "<h1>" + title + "</h1>\n";
}
private static String formatHtml(String html) {
org.jsoup.nodes.Document doc = Jsoup.parse(html);
// jsoup标准化标签,生成闭合标签
doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
return doc.html();
}
private static byte[] htmlToPdf(String content, String fileName, List<String> titles) {
// html转pdf,获取pdf字节流
byte[] pdfContentByte = getPdfContentByte(content);
// 给pdf加上书签并支持跳转
byte[] pdfContentWithTitleByte = addPdfTitles(titles, pdfContentByte);
return pdfContentWithTitleByte;
}
private static byte[] getPdfContentByte(String content) {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
PdfWriter writer = new PdfWriter(byteArrayOutputStream);
PdfDocument pdfDoc = new PdfDocument(writer);
pdfDoc.setDefaultPageSize(new PageSize(PageSize.A4));
try {
// 处理中文字体,解决中文不显示问题
ConverterProperties converterProperties = initConverterProperties();
// html转换为pdf
HtmlConverter.convertToPdf(new ByteArrayInputStream(content.getBytes("UTF-8")), pdfDoc, converterProperties);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}catch (Exception e){
e.printStackTrace();
} finally{
return byteArrayOutputStream.toByteArray();
}
}
private static byte[] addPdfTitles(List<String> titles, byte[] content) {
InputStream inputStream = new ByteArrayInputStream(content);
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
try {
PdfDocument pdfDoc = new PdfDocument(new PdfReader(inputStream), new PdfWriter(byteArrayOutputStream));
pdfDoc.setDefaultPageSize(new PageSize(PageSize.A4));
// 设置根目录书签
PdfOutline root = pdfDoc.getOutlines(true);
int index = 0;
for (int page = 1; page <= pdfDoc.getNumberOfPages(); page++) {
String pageContent = PdfTextExtractor.getTextFromPage(pdfDoc.getPage(page));
int pageNum = page;
long count = titles.stream().skip(index).filter(title -> {
if (pageContent.contains(title)) {
log.info("{} : Text found on page {}", title, pageNum);
// 设置书签,并支持跳转
PdfOutline pdfOutline = root.addOutline(title);
PdfDestination destination = PdfExplicitRemoteGoToDestination.createFitV(pageNum, 2);
pdfOutline.addDestination(destination);
return true;
}
return false;
}).count();
if(count>0) {
index += count;
}
}
pdfDoc.close();
} catch (IOException exception) {
exception.printStackTrace();
} finally {
return byteArrayOutputStream.toByteArray();
}
}
private static ConverterProperties initConverterProperties(){
ConverterProperties converterProperties = new ConverterProperties();
// processing the properties of the СSS 或者 creating the SVG
MediaDeviceDescription mediaDeviceDescription = new MediaDeviceDescription(MediaType.PRINT);
converterProperties.setMediaDeviceDescription(mediaDeviceDescription);
// 字体设置, itext 默认不支持中文
FontProvider fontProvider = new FontProvider();
// 字体文件相对路径,需要在windows和linux服务器上安装,或者上传
String fontPath = "/home/app/pdf/simhei.ttf";
log.info("字体路径:{}", fontPath);
fontProvider.addFont(fontPath);
converterProperties.setFontProvider(fontProvider);
return converterProperties;
}
private static String CssHtml(String html) {
String ccs = "<!DOCTYPE html>\n" +
"<html lang=\"zh\">\n" +
" <head> \n" +
" <meta charset=\"utf-8\" /> \n" +
" <style type=\"text/css\">\n" +
" body{\n" +
" width: 100%;\n" +
" margin: 0;\n" +
" padding: 0;\n" +
" }\n" +
" \n" +
" body > *:first-child {\n" +
" margin-top: 0 !important;\n" +
" }\n" +
" \n" +
" body > *:last-child {\n" +
" margin-bottom: 0 !important;\n" +
" }\n" +
" \n" +
" a {\n" +
" color: #0052d9;\n" +
" /*font-weight: 600;*/\n" +
" padding: 0 2px;\n" +
" text-decoration: none;\n" +
" }\n" +
" \n" +
" h1,\n" +
" h2,\n" +
" h3,\n" +
" h4,\n" +
" h5,\n" +
" h6 {\n" +
" position: relative;\n" +
" margin-top: 15px;\n" +
" margin-bottom: 10px;\n" +
" font-weight: bold;\n" +
" line-height: 1.4;\n" +
" cursor: text;\n" +
" }\n" +
" \n" +
" h1:hover a.anchor,\n" +
" h2:hover a.anchor,\n" +
" h3:hover a.anchor,\n" +
" h4:hover a.anchor,\n" +
" h5:hover a.anchor,\n" +
" h6:hover a.anchor {\n" +
" text-decoration: none;\n" +
" }\n" +
" \n" +
" h1 tt,\n" +
" h1 code {\n" +
" font-size: inherit !important;\n" +
" }\n" +
" \n" +
" h2 tt,\n" +
" h2 code {\n" +
" font-size: inherit !important;\n" +
" }\n" +
" \n" +
" h3 tt,\n" +
" h3 code {\n" +
" font-size: inherit !important;\n" +
" }\n" +
" \n" +
" h4 tt,\n" +
" h4 code {\n" +
" font-size: inherit !important;\n" +
" }\n" +
" \n" +
" h5 tt,\n" +
" h5 code {\n" +
" font-size: inherit !important;\n" +
" }\n" +
" \n" +
" h6 tt,\n" +
" h6 code {\n" +
" font-size: inherit !important;\n" +
" }\n" +
" \n" +
" h2 a,\n" +
" h3 a {\n" +
" color: #34495e;\n" +
" }\n" +
" p\n" +
" blockquote,\n" +
" ul,\n" +
" li,\n" +
" ol,\n" +
" dl,\n" +
" table {\n" +
" margin: 10px 0px;\n" +
" border: 1px solid #f6f6f6;\n" +
" border-collapse: separate;\n" +
" border-spacing: 0;\n" +
" }\n" +
" h1, h2, h3, h4, h5, h6 {\n" +
" font-weight: bold;\n" +
" color: #2c354d;\n" +
" margin-bottom: 16px;\n" +
" margin-top: 16px;\n" +
" }\n" +
" h1 {\n" +
" font-size: 30px;\n" +
" }\n" +
" h2, h3 {\n" +
" font-size: 24px;\n" +
" border: none;\n" +
" padding-bottom: 0;\n" +
" margin-top: 50px;\n" +
" }\n" +
" h4, h5, h6 {\n" +
" font-size: 14px;\n" +
" }\n" +
" p {\n" +
" font-size: 14px;\n" +
" color: #2c354d;\n" +
" margin: 16px 0;\n" +
" }\n" +
" .quote{\n" +
" border-left: 4px solid #42b983;\n" +
" color: #888;\n" +
" background-color: rgba(66, 185, 131, .2);\n" +
" margin-top: 15px;\n" +
" padding: 10px;\n" +
" }\n" +
" \n" +
" .on-focus-mode blockquote {\n" +
" border-left-color: rgba(85, 85, 85, 0.12);\n" +
" }\n" +
" table,\n" +
" td,\n" +
" tr,\n" +
" img,\n" +
" th {\n" +
" width: 100%;\n" +
" }\n" +
" table thead tr {\n" +
" height: 50px;\n" +
" }\n" +
" table thead tr th {\n" +
" background: #f2f3f7;\n" +
" font-size: 16px;\n" +
" padding: 0 10px;\n" +
" text-align: left;\n" +
" color: #2c354d;\n" +
" border: 1px solid #dcdee0;\n" +
" font-weight: 400;\n" +
" word-break: keep-all;\n" +
"\n" +
" }\n" +
" table tbody tr {\n" +
" height: 50px;\n" +
" }\n" +
" table tbody tr td {\n" +
" padding: 0 10px;\n" +
" color: #2c354d;\n" +
" border: 1px solid #efefef;\n" +
" border-color: #dcdee0;\n" +
" }\n" +
" pre {\n" +
" font-size: 14px;\n" +
" border-radius: 0;\n" +
" overflow-x: auto;\n" +
" background: #f2f3f7;\n" +
" padding: 20px 30px;\n" +
" line-height: 28px;\n" +
" word-break: break-word;\n" +
" display: block;\n" +
" }\n" +
" code {\n" +
" color: #2c354d;\n" +
" padding: 0;\n" +
" margin: 0;\n" +
" word-break: normal;\n" +
" background-color: #f5f5f5;\n" +
" font-size: 14px;\n" +
" border-radius: 0;\n" +
" overflow-x: auto;\n" +
" }\n" +
" </style> \n" +
" </head> \n" +
" <body style=\"font-size:12.0pt; font-family:simhei\">";
html = ccs + html + "</body>\n" +
" </html>";
return html;
}
}
测试代码
@ApiOperation("文档下载")
@GetMapping("/download/test")
public void download(HttpServletResponse response) {
List<MarkdownDTO> dtos = new ArrayList<>();
dtos.add(MarkdownDTO.builder().content("## 测试文档01\n" +
"* 测试内容\n" +
"* 测试类型").title("测试下载").build());
try {
MdToPdfUtils.convertPDF(dtos,"测试下载", response);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
效果如下: