【Python2LaTeX】py文件/目录转LaTeX文档

琛説
已于 2025-03-02 11:24:38 修改
阅读量391
点赞数 5
文章标签： python 开发语言
于 2025-03-02 11:17:04 首次发布
本文链接：https://blog.csdn.net/m0_66817474/article/details/145960299
版权
最终效果
任务要求
实现代码
最终效果

任务要求

希望传入一个文件夹地址，最后输出一个tex文件。程序需要遍历该文件夹下的所有py文件，并整合出所有的“类”和“函数”写入tex文件。此外，对于函数注释需要提到代码块之外，并转为LaTeX文本。
实现代码

import re
import os
import chardet


def detect_encoding(file_path):
    """
    检测文件的编码
    :param file_path: 文件路径
    :return: 文件编码
    """
    with open(file_path, 'rb') as f:
        raw_data = f.read()
        result = chardet.detect(raw_data)
        return result['encoding']


def extract_functions_and_docstrings(file_path):
    classes = []
    functions = []
    try:
        encoding = detect_encoding(file_path)
        with open(file_path, 'r', encoding=encoding) as file:
            content = file.read()
            # 匹配类定义
            class_pattern = re.compile(r'(class\s+(\w+)\s*:\s*(.*?)(?=(^class\s|\Z)))', re.DOTALL | re.MULTILINE)
            for class_match in class_pattern.finditer(content):
                class_name = class_match.group(2)
                class_body = class_match.group(3)
                class_docstring = re.search(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', class_body, re.DOTALL)
                if class_docstring:
                    class_docstring_text = class_docstring.group(0).strip()
                    class_body = re.sub(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', '', class_body, 1, re.DOTALL).strip()
                else:
                    class_docstring_text = ""
                class_functions = []
                # 匹配类中的函数定义
                func_pattern = re.compile(r'def\s+(\w+)\s*\((.*?)\):(.*?)(?=(^def\s|\Z))', re.DOTALL | re.MULTILINE)
                for func_match in func_pattern.finditer(class_body):
                    func_name = func_match.group(1)
                    args = func_match.group(2)
                    body_and_docstring = func_match.group(3).strip()
                    docstring = re.search(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', body_and_docstring, re.DOTALL)
                    if docstring:
                        docstring_text = docstring.group(0).strip()
                        body = re.sub(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', '', body_and_docstring, 1, re.DOTALL).strip()
                    else:
                        docstring_text = ""
                        body = body_and_docstring
                    class_functions.append((func_name, args, docstring_text, body))
                classes.append((class_name, class_docstring_text, class_functions))

            # 匹配文件级别的函数定义
            func_pattern = re.compile(r'def\s+(\w+)\s*\((.*?)\):(.*?)(?=(^def\s|\Z))', re.DOTALL | re.MULTILINE)
            for func_match in func_pattern.finditer(content):
                if not re.search(r'class\s+(\w+)\s*:\s*(.*?)', content[:func_match.start()], re.DOTALL | re.MULTILINE):
                    func_name = func_match.group(1)
                    args = func_match.group(2)
                    body_and_docstring = func_match.group(3).strip()
                    docstring = re.search(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', body_and_docstring, re.DOTALL)
                    if docstring:
                        docstring_text = docstring.group(0).strip()
                        body = re.sub(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', '', body_and_docstring, 1, re.DOTALL).strip()
                    else:
                        docstring_text = ""
                        body = body_and_docstring
                    functions.append((func_name, args, docstring_text, body))
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
    return classes, functions


def wrap_with_texttt(name):
    """
    如果名称中包含下划线，使用 \texttt{} 包裹名称
    :param name: 名称
    :return: 处理后的名称
    """
    if '_' in name:
        if name == '__init__':
            return r'\texttt{\_\_init\_\_}'  # 特殊处理 __init__
        return r'\texttt{' + name.replace('_', '\_') + '}'
    return name


def process_underscores_in_text(text):
    """
    处理文本中出现的下划线
    :param text: 文本
    :return: 处理后的文本
    """
    words = text.split()
    new_words = []
    for word in words:
        if '_' in word:
            new_words.append(f"${word}$")
        else:
            new_words.append(word)
    return ' '.join(new_words)


def escape_special_chars(line):
    """
    转义特殊字符
    :param line: 输入的行
    :return: 转义后的行
    """
    special_chars = {
        '#': r'',
        '%': r'\% ',
        '$': r'',
        '&': r'\& ',
        '\\': r'\verb|\\| ',
        '{': r'\{ ',
        '}': r'\} ',
        '_': r'\_',
        '^': r'\^{}',
        '~': r'\~{}',
        '"""': r''
    }
    for char, escaped in special_chars.items():
        line = line.replace(char, escaped)
    return line


def generate_latex(classes, functions, py_file_name):
    latex_code = f"\\section{{{wrap_with_texttt(py_file_name)}}}\n"
    # 处理文件级别的函数
    for func_name, args, docstring_text, body in functions:
        wrapped_func_name = wrap_with_texttt(func_name)
        latex_code += f"\\subsection{{{wrapped_func_name}}}\n"
        if docstring_text:
            processed_docstring = process_underscores_in_text(docstring_text)
            # 转义 docstring 中的特殊字符
            processed_docstring = escape_special_chars(processed_docstring)
            latex_code += processed_docstring.replace('\n', '\\newline ') + '\\newline \\newline'
        latex_code += "\\begin{lstlisting}[language=Python]\n"
        latex_code += f"def {func_name}({args}):\n"
        for line in body.splitlines():
            if line.strip():
                latex_code += "    " + line + "\n"  # 不需要转义 lstlisting 块内的内容
        latex_code += "\\end{lstlisting}\n\n"

    # 处理类及其内部函数
    for class_name, class_docstring_text, class_functions in classes:
        wrapped_class_name = wrap_with_texttt(class_name)
        latex_code += f"\\subsection{{{wrapped_class_name}}}\n"
        if class_docstring_text:
            processed_docstring = process_underscores_in_text(class_docstring_text)
            # 转义 class_docstring 中的特殊字符
            processed_docstring = escape_special_chars(processed_docstring)
            latex_code += processed_docstring.replace('\n', '\\newline ') + '\\newline \\newline'
        for func_name, args, docstring_text, body in class_functions:
            wrapped_func_name = wrap_with_texttt(func_name)
            latex_code += f"\\subsubsection{{{wrapped_func_name}}}\n"
            if docstring_text:
                processed_docstring = process_underscores_in_text(docstring_text)
                # 转义 docstring 中的特殊字符
                processed_docstring = escape_special_chars(processed_docstring)
                latex_code += processed_docstring.replace('\n', '\\newline ') + '\\newline \\newline'
            latex_code += "\\begin{lstlisting}[language=Python]\n"
            latex_code += f"    def {func_name}({args}):\n"
            for line in body.splitlines():
                if line.strip():
                    latex_code += "        " + line + "\n"  # 不需要转义 lstlisting 块内的内容
            latex_code += "\\end{lstlisting}\n\n"

    return latex_code


def process_folder(folder_path):
    all_latex = ""
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.py'):
                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                classes, functions = extract_functions_and_docstrings(file_path)
                all_latex += generate_latex(classes, functions, file)
    return all_latex


latex_template = r"""\documentclass[UTF8]{ctexart}
\usepackage{listings}
\usepackage{xcolor}
\usepackage{geometry}
\usepackage{hyperref}

% 设置页面边距
\geometry{margin=2cm}

% 配置 hyperref 宏包
\hypersetup{
    colorlinks=true,
    linkcolor=blue,
    urlcolor=blue,
    citecolor=blue
}

% 设置代码环境样式
\lstset{
    basicstyle=\small\ttfamily,
    columns=flexible,
    numbers=left,
    breaklines=true,
    numberstyle=\footnotesize\color{darkgray},
    frame=none,
    backgroundcolor=\color[RGB]{245,245,244},
    keywordstyle=\color[RGB]{40,40,255},
    commentstyle=\itshape\color[RGB]{0,96,96},
    stringstyle=\rmfamily\slshape\color[RGB]{128,0,0},
    showstringspaces=false,
    language=Python
}

\begin{document}

% 封面页
\begin{titlepage}
    \centering
    \vspace*{5cm}
    \Huge \textbf{文档标题}
    \vfill
\end{titlepage}

% 生成目录
\tableofcontents
\clearpage
"""

if __name__ == "__main__":
    folder_path = "./code"  # 替换为实际的文件夹路径
    all_latex = process_folder(folder_path)
    final_latex = latex_template + all_latex + r"\end{document}"

    # 将生成的 LaTeX 代码写入 txt 文件
    output_file_path = "output_latex.tex"
    with open(output_file_path, 'w', encoding='utf-8') as output_file:
        output_file.write(final_latex)

    print(f"LaTeX 代码已成功导出到 {output_file_path}")
只需要修改 `__main__` 中的 `folder_path` 和 `output_file_path` 即可。