from lxml import etree, html
import re
mathml = """
<div>
<p>Here is an equation:</p>
<math xmlns="http://www.w3.org/1998/Math/MathML">
<mrow>
<mi>x</mi>
<mo>=</mo>
<mfrac>
<mrow>
<mo>-</mo>
<mi>b</mi>
<mo>±</mo>
<msqrt>
<mrow>
<msup>
<mi>b</mi>
<mn>2</mn>
</msup>
<mo>-</mo>
<mn>4</mn>
<mo>⁢</mo>
<mi>a</mi>
<mo>⁢</mo>
<mi>c</mi>
</mrow>
</msqrt>
</mrow>
<mrow>
<mn>2</mn>
<mo>⁢</mo>
<mi>a</mi>
</mrow>
</mfrac>
</mrow>
</math>
<p>End of equation.</p>
</div>
"""
def mathml2latex(equation):
try:
# 解析XSLT文件
xslt_path = r'.\xsl_yarosh\mmltex.xsl'
xslt = etree.parse(xslt_path)
# 创建XSLT转换器
transform = etree.XSLT(xslt)
# 使用正则表达式提取MathML部分
mathml_pattern = re.compile(r'(<math.*?</math>)', re.DOTALL)
matches = mathml_pattern.findall(equation)
for match in matches:
# 解析MathML字符串
math_dom = etree.fromstring(match)
# 执行转换
newdom = transform(math_dom)
# 将MathML部分替换为LaTeX字符串
equation = equation.replace(match, f"<span>{str(newdom)}</span>")
return equation
except etree.XMLSyntaxError as e:
print(f"XML解析错误: {e}")
except etree.XSLTParseError as e:
print(f"XSLT解析错误: {e}")
except Exception as e:
print(f"发生错误: {e}")
tex = mathml2latex(mathml)
print(tex)
依赖文件:GitHub - oerpub/mathconverter: Converts from AsciiMath, LaTeX, MathML to LaTeX, MathML