importos,reimportsys,getoptfrom enum importEnumfrom subprocess importcallfrom functools importreducefrom docopt importdocopt__version__ = '1.0'
#定义三个枚举类#定义表状态
classTABLE(Enum):
Init= 1Format= 2Table= 3
#有序序列状态
classORDERLIST(Enum):
Init= 1List= 2
#块状态
classBLOCK(Enum):
Init= 1Block= 2CodeBlock= 3
#定义全局状态,并初始化状态
table_state =TABLE.Init
orderList_state=ORDERLIST.Init
block_state=BLOCK.Init
is_code=False
is_normal=True
temp_table_first_line=[]
temp_table_first_line_str= ""need_mathjax=Falsedeftest_state(input):globaltable_state, orderList_state, block_state, is_code, temp_table_first_line, temp_table_first_line_str
Code_List= ["python\n", "c++\n", "c\n"]
result=input#构建正则表达式规则
#匹配块标识
pattern = re.compile(r'```(\s)*\n')
a=pattern.match(input)#普通块
if a and block_state ==BLOCK.Init:
result= "
"block_state=BLOCK.Block"block_state=BLOCK.Initis_normal=False#特殊代码块
elif len(input) > 4 and input[0:3] == '```' and (input[3:9] == "python" or input[3:6] == "c++" or input[3:4]== "c") and block_state ==BLOCK.Init:
block_state=BLOCK.Block
result= "
"is_code=True
is_normal=False#块结束
elif block_state == BLOCK.Block and input == '```\n':ifis_code:
result= ""
else:
result= "
is_code=False
is_normal=Falseelif block_state ==BLOCK.Block:
pattern= re.compile(r'[\n\r\v\f\ ]')
result= pattern.sub(" ", result)
pattern= re.compile(r'\t')
result= pattern.sub(" " * 4, result)
result= "" + result + ""is_normal=False#解析有序序列
if len(input) > 2 and input[0].isdigit() and input[1] == '.' and orderList_state ==ORDERLIST.Init:
orderList_state=ORDERLIST.List
result= "
- " + input[2:] + ""is_normal=Falseelif len(input) > 2 and input[0].isdigit() and input[1] == '.' and orderList_state ==ORDERLIST.List:
result= "
" + input[2:] + ""is_normal=Falseelif orderList_state == ORDERLIST.List and (len(input) <= 2 or input[0].isdigit() == False or input[1] != '.'):result= "
" +inputorderList_state=ORDERLIST.Init#解析表格
pattern = re.compile(r'^((.+)\|)+((.+))$')
match=pattern.match(input)ifmatch:
l= input.split('|')
l[-1] = l[-1][:-1]#将空字符弹出列表
if l[0] == '':
l.pop(0)if l[-1] == '':
l.pop(-1)if table_state ==TABLE.Init:
table_state=TABLE.Format
temp_table_first_line=l
temp_table_first_line_str=input
result= ""
elif table_state ==TABLE.Format:#如果是表头与表格主题的分割线
if reduce(lambda a, b: a and b, [all_same(i,'-') for i inl], True):
table_state=TABLE.Table
result= "
for i intemp_table_first_line:
result+= "
" + i + ""result+= ""result+= ""is_normal=Falseelse:result= temp_table_first_line_str + "" +input
table_state=TABLE.Initelif table_state ==TABLE.Table:
result= "
"for i inl:
result+= "
" + i + ""result+= ""elif table_state ==TABLE.Table:
table_state=TABLE.Init
result= "
" +resultelif table_state ==TABLE.Format:passreturnresult#判断 lst 是否全由字符 sym 构成
defall_same(lst, sym):return not lst or sym * len(lst) ==lst#处理标题
defhandleTitle(s, n):
temp= "" + s[n:] + ""
returntemp#处理无序列表
defhandleUnorderd(s):
s= "
- " + s[1:]
s+= "
returnsdeftokenTemplate(s, match):
pattern= ""
if match == '*':
pattern= "\*([^\*]*)\*"
if match == '~~':
pattern= "\~\~([^\~\~]*)\~\~"
if match == '**':
pattern= "\*\*([^\*\*]*)\*\*"
returnpattern#处理特殊标识,比如 **, *, ~~
deftokenHandler(s):
l= ['b', 'i', 'S']
j=0for i in ['**', '*', '~~']:
pattern=re.compile(tokenTemplate(s,i))
match=pattern.finditer(s)
k=0for a inmatch:ifa:
content= a.group(1)
x,y=a.span()
c= 3
if i == '*':
c= 5s= s[:x+c*k] + "" + content + "" + l[j] + ">" + s[y+c*k:]
k+= 1pattern= re.compile(r'\$([^\$]*)\$')
a=pattern.search(s)ifa:globalneed_mathjax
need_mathjax=True
j+= 1
returns#处理链接
deflink_image(s):#超链接
pattern = re.compile(r'\\\[(.*)\]\((.*)\)')
match=pattern.finditer(s)for a inmatch:ifa:
text, url= a.group(1,2)
x, y=a.span()
s= s[:x] + "" + text + "" +s[y:]#图像链接
pattern = re.compile(r'!\[(.*)\]\((.*)\)')
match=pattern.finditer(s)for a inmatch:ifa:
text, url= a.group(1,2)
x, y=a.span()
s= s[:x] + "" + "" +s[y:]#角标
pattern = re.compile(r'(.)\^\[([^\]]*)\]')
match=pattern.finditer(s)
k=0for a inmatch:ifa:
sym,index= a.group(1,2)
x, y=a.span()
s= s[:x+8*k] + sym + "" + index + "" + s[y+8*k:]
k+= 1
returnsdefparse(input):globalblock_state, is_normal
is_normal=True
result=input#检测当前 input 解析状态
result =test_state(input)if block_state ==BLOCK.Block:returnresult#分析标题标记 #
title_rank =0for i in range(6, 0, -1):if input[:i] == '#'*i:
title_rank=ibreak
if title_rank !=0:#处理标题,转化为相应的 HTML 文本
result =handleTitle(input, title_rank)returnresult#分析分割线标记 --
if len(input) > 2 and all_same(input[:-1], '-') and input[-1] == '\n':
result= "
"
returnresult#解析无序列表
unorderd = ['+', '-']if result != "" and result[0] inunorderd :
result=handleUnorderd(result)
is_normal=False
f=input[0]
count=0
sys_q=Falsewhile f == '>':
count+= 1f=input[count]
sys_q=Trueifsys_q:
result= "
"*count + " " + input[count:] + "" + ""*count
is_normal=False#处理特殊标记,比如 ***, ~~~
result =tokenHandler(result)#解析图像链接
result =link_image(result)
pa= re.compile(r'^(\s)*$')
a=pa.match(input)if input[-1] == "\n" and is_normal == True and nota :
result+=""
returnresultdefrun(source_file, dest_file, dest_pdf_file, only_pdf):#获取文件名
file_name =source_file#转换后的 HTML 文件名
dest_name =dest_file#转换后的 PDF 文件名
dest_pdf_name =dest_pdf_file#获取文件后缀
_, suffix =os.path.splitext(file_name)if suffix not in [".md",".markdown",".mdown","mkd"]:print('Error: the file should be in markdown format')
sys.exit(1)ifonly_pdf:
dest_name= ".~temp~.html"f= open(file_name, "r")
f_r= open(dest_name, "w")#往文件中填写 HTML 的一些属性
f_r.write("""
#wrapper { width: 100%;height:100%; margin: 0; padding: 0;}#left { float:left; \
width: 10%; height: 100%; }#second { float:left; width: 80%;height: 100%; \
}#right {float:left; width: 10%; height: 100%; \
}
f_r.write("""""")#逐行解析 markdwon 文件
for eachline inf:
result=parse(eachline)if result != "":
f_r.write(result)
f_r.write("""
globalneed_mathjaxifneed_mathjax:
f_r.write("""
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}});\
""")#文件操作完成之后记得关闭!!!
f_r.close()
f.close()#调用扩展 wkhtmltopdf 将 HTML 文件转换成 PDF
if dest_pdf_name != "" oronly_pdf:
call(["wkhtmltopdf", dest_name, dest_pdf_name])#如果有必要,删除中间过程生成的 HTML 文件
ifonly_pdf:
call(["rm", dest_name])#主函数
defmain():
dest_file= "translation_result.html"dest_pdf_file= "translation_result.pdf"only_pdf=False
args= docopt(__doc__, version=__version__)
dest_file= args[''] if args['--output'] elsedest_file
dest_pdf_file= args[''] if args['--print'] or args['--Print'] else ""run(args[''], dest_file, dest_pdf_file, args['--Print'])if __name__=="__main__":
main()