基础的转换
转换原理:
1、使用markdown库将md转换为html
2、使用pdfkit将html转换为pdf(需要安装单独安装wkhtmltopdf)
实现代码
# pip3 install markdown
# pip3 install pdfkit
import codecs
import markdown
import pdfkit
with codecs.open("test.md", "r", encoding="utf-8") as f:
md_content = f.read()
html_content = markdown.markdown(md_content)
with codecs.open("test.html", "w", encoding="utf-8") as f:
# 加入文件头防止中文乱码
f.write('<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>')
f.write(html_content)
pdfkit.from_file("test.html", "test.pdf")
存在问题
以上代码生成的,无法不支持代码高亮、表格等一些特殊格式的显示,可以通过相关渲染库和扩展插件解决
md特有格式不支持解决
引入如下python库
python-markdown-math
markdown_checklist
pymdown-extensions
from pymdownx import superfences
import markdown
extensions = [
'toc', # 目录,[toc]
'extra', # 缩写词、属性列表、释义列表、围栏式代码块、脚注、在HTML的Markdown、表格
]
third_party_extensions = [
'mdx_math', # KaTeX数学公式,$E=mc^2$和$$E=mc^2$$
'markdown_checklist.extension', # checklist,- [ ]和- [x]
'pymdownx.magiclink', # 自动转超链接,
'pymdownx.caret', # 上标下标,
'pymdownx.superfences', # 多种块功能允许嵌套,各种图表
'pymdownx.betterem', # 改善强调的处理(粗体和斜体)
'pymdownx.mark', # 亮色突出文本
'pymdownx.highlight', # 高亮显示代码
'pymdownx.tasklist', # 任务列表
'pymdownx.tilde', # 删除线
]
extensions.extend(third_party_extensions)
extension_configs = {
'mdx_math': {
'enable_dollar_delimiter': True # 允许单个$
},
'pymdownx.superfences': {
"custom_fences": [
{
'name': 'mermaid', # 开启流程图等图
'class': 'mermaid',
'format': superfences.fence_div_format
}
]
},
'pymdownx.highlight': {
'linenums': True, # 显示行号
'linenums_style': 'pymdownx-inline' # 代码和行号分开
},
'pymdownx.tasklist': {
'clickable_checkbox': True, # 任务列表可点击
}
}
# 此行代码修改
html_content = markdown.markdown(md_content, extensions=extensions, extension_configs=extension_configs)
css渲染解决
经过插件修改后,html的样式将会带有class的样式,但是转成的html和pdf由于没有引擎渲染,需要手动引入CSS进行渲染。
需要安装pygments,然后生成相应的样式
pip3 install pygments
# 如果需要代码高亮的样式,执行如下命令
pygmentize -f html -a .highlight -S default > pygments.css
with codecs.open("test.html", "w", encoding="utf-8") as f:
# 将CSS文件写入html文件中
with open("pygments.css", "r") as g:
f.write('''<head>
<style>
{}
</style>
{}
</head>\n'''.format(g.read(), '<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>'))
f.write(html_content)
图片大小导致pdf文字异常
当源文件里面有图片的话,会导致生成的pdf随着图片尺寸变化,特别难看,通过手动修改html的样式参数实现图片自动伸缩
import codecs
from bs4 import BeautifulSoup
# 优化html中的图片信息
with codecs.open("test.html", "r", encoding="utf-8") as f:
soup = BeautifulSoup(html_content, features="lxml")
image_content = soup.find_all("img")
for i in image_content:
i["style"] = "max-width:100%; overflow:hidden;"
with codecs.open("test_final.html", "w", encoding="utf-8") as g:
g.write(soup.prettify())
完整流程
import codecs
import markdown
import pdfkit
from pymdownx import superfences
from bs4 import BeautifulSoup
extensions = [
'toc', # 目录,[toc]
'extra', # 缩写词、属性列表、释义列表、围栏式代码块、脚注、在HTML的Markdown、表格
]
third_party_extensions = [
'mdx_math', # KaTeX数学公式,$E=mc^2$和$$E=mc^2$$
'markdown_checklist.extension', # checklist,- [ ]和- [x]
'pymdownx.magiclink', # 自动转超链接,
'pymdownx.caret', # 上标下标,
'pymdownx.superfences', # 多种块功能允许嵌套,各种图表
'pymdownx.betterem', # 改善强调的处理(粗体和斜体)
'pymdownx.mark', # 亮色突出文本
'pymdownx.highlight', # 高亮显示代码
'pymdownx.tasklist', # 任务列表
'pymdownx.tilde', # 删除线
]
extensions.extend(third_party_extensions)
extension_configs = {
'mdx_math': {
'enable_dollar_delimiter': True # 允许单个$
},
'pymdownx.superfences': {
"custom_fences": [
{
'name': 'mermaid', # 开启流程图等图
'class': 'mermaid',
'format': superfences.fence_div_format
}
]
},
'pymdownx.highlight': {
'linenums': True, # 显示行号
'linenums_style': 'pymdownx-inline' # 代码和行号分开
},
'pymdownx.tasklist': {
'clickable_checkbox': True, # 任务列表可点击
}
}
with codecs.open("test.md", "r", encoding="utf-8") as f:
md_content = f.read()
html_content = markdown.markdown(md_content, extensions=extensions, extension_configs=extension_configs)
with codecs.open("test.html", "w", encoding="utf-8") as f:
# 加入文件头防止中文乱码
with open("pygments.css", "r") as g:
f.write('''<head>
<style>
{}
</style>
{}
</head>\n'''.format(g.read(), '<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>'))
f.write(html_content)
# 优化html中的图片信息
with codecs.open("test.html", "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, features="lxml")
image_content = soup.find_all("img")
for i in image_content:
i["style"] = "max-width:100%; overflow:hidden;"
with codecs.open("test_final.html", "w", encoding="utf-8") as g:
g.write(soup.prettify())
pdfkit.from_file("test_final.html", "test.pdf")