目录
最终效果
任务要求
希望传入一个文件夹地址,最后输出一个tex文件。程序需要遍历该文件夹下的所有py文件,并整合出所有的“类”和“函数”写入tex文件。此外,对于函数注释需要提到代码块之外,并转为LaTeX文本。
实现代码
import re
import os
import chardet
def detect_encoding(file_path):
"""
检测文件的编码
:param file_path: 文件路径
:return: 文件编码
"""
with open(file_path, 'rb') as f:
raw_data = f.read()
result = chardet.detect(raw_data)
return result['encoding']
def extract_functions_and_docstrings(file_path):
classes = []
functions = []
try:
encoding = detect_encoding(file_path)
with open(file_path, 'r', encoding=encoding) as file:
content = file.read()
# 匹配类定义
class_pattern = re.compile(r'(class\s+(\w+)\s*:\s*(.*?)(?=(^class\s|\Z)))', re.DOTALL | re.MULTILINE)
for class_match in class_pattern.finditer(content):
class_name = class_match.group(2)
class_body = class_match.group(3)
class_docstring = re.search(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', class_body, re.DOTALL)
if class_docstring:
class_docstring_text = class_docstring.group(0).strip()
class_body = re.sub(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', '', class_body, 1, re.DOTALL).strip()
else:
class_docstring_text = ""
class_functions = []
# 匹配类中的函数定义
func_pattern = re.compile(r'def\s+(\w+)\s*\((.*?)\):(.*?)(?=(^def\s|\Z))', re.DOTALL | re.MULTILINE)
for func_match in func_pattern.finditer(class_body):
func_name = func_match.group(1)
args = func_match.group(2)
body_and_docstring = func_match.group(3).strip()
docstring = re.search(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', body_and_docstring, re.DOTALL)
if docstring:
docstring_text = docstring.group(0).strip()
body = re.sub(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', '', body_and_docstring, 1, re.DOTALL).strip()
else:
docstring_text = ""
body = body_and_docstring
class_functions.append((func_name, args, docstring_text, body))
classes.append((class_name, class_docstring_text, class_functions))
# 匹配文件级别的函数定义
func_pattern = re.compile(r'def\s+(\w+)\s*\((.*?)\):(.*?)(?=(^def\s|\Z))', re.DOTALL | re.MULTILINE)
for func_match in func_pattern.finditer(content):
if not re.search(r'class\s+(\w+)\s*:\s*(.*?)', content[:func_match.start()], re.DOTALL | re.MULTILINE):
func_name = func_match.group(1)
args = func_match.group(2)
body_and_docstring = func_match.group(3).strip()
docstring = re.search(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', body_and_docstring, re.DOTALL)
if docstring:
docstring_text = docstring.group(0).strip()
body = re.sub(r'\'\'\'(.*?)\'\'\'|"""(.*?)"""', '', body_and_docstring, 1, re.DOTALL).strip()
else:
docstring_text = ""
body = body_and_docstring
functions.append((func_name, args, docstring_text, body))
except Exception as e:
print(f"Error processing file {file_path}: {e}")
return classes, functions
def wrap_with_texttt(name):
"""
如果名称中包含下划线,使用 \texttt{} 包裹名称
:param name: 名称
:return: 处理后的名称
"""
if '_' in name:
if name == '__init__':
return r'\texttt{\_\_init\_\_}' # 特殊处理 __init__
return r'\texttt{' + name.replace('_', '\_') + '}'
return name
def process_underscores_in_text(text):
"""
处理文本中出现的下划线
:param text: 文本
:return: 处理后的文本
"""
words = text.split()
new_words = []
for word in words:
if '_' in word:
new_words.append(f"${word}$")
else:
new_words.append(word)
return ' '.join(new_words)
def escape_special_chars(line):
"""
转义特殊字符
:param line: 输入的行
:return: 转义后的行
"""
special_chars = {
'#': r'',
'%': r'\% ',
'$': r'',
'&': r'\& ',
'\\': r'\verb|\\| ',
'{': r'\{ ',
'}': r'\} ',
'_': r'\_',
'^': r'\^{}',
'~': r'\~{}',
'"""': r''
}
for char, escaped in special_chars.items():
line = line.replace(char, escaped)
return line
def generate_latex(classes, functions, py_file_name):
latex_code = f"\\section{{{wrap_with_texttt(py_file_name)}}}\n"
# 处理文件级别的函数
for func_name, args, docstring_text, body in functions:
wrapped_func_name = wrap_with_texttt(func_name)
latex_code += f"\\subsection{{{wrapped_func_name}}}\n"
if docstring_text:
processed_docstring = process_underscores_in_text(docstring_text)
# 转义 docstring 中的特殊字符
processed_docstring = escape_special_chars(processed_docstring)
latex_code += processed_docstring.replace('\n', '\\newline ') + '\\newline \\newline'
latex_code += "\\begin{lstlisting}[language=Python]\n"
latex_code += f"def {func_name}({args}):\n"
for line in body.splitlines():
if line.strip():
latex_code += " " + line + "\n" # 不需要转义 lstlisting 块内的内容
latex_code += "\\end{lstlisting}\n\n"
# 处理类及其内部函数
for class_name, class_docstring_text, class_functions in classes:
wrapped_class_name = wrap_with_texttt(class_name)
latex_code += f"\\subsection{{{wrapped_class_name}}}\n"
if class_docstring_text:
processed_docstring = process_underscores_in_text(class_docstring_text)
# 转义 class_docstring 中的特殊字符
processed_docstring = escape_special_chars(processed_docstring)
latex_code += processed_docstring.replace('\n', '\\newline ') + '\\newline \\newline'
for func_name, args, docstring_text, body in class_functions:
wrapped_func_name = wrap_with_texttt(func_name)
latex_code += f"\\subsubsection{{{wrapped_func_name}}}\n"
if docstring_text:
processed_docstring = process_underscores_in_text(docstring_text)
# 转义 docstring 中的特殊字符
processed_docstring = escape_special_chars(processed_docstring)
latex_code += processed_docstring.replace('\n', '\\newline ') + '\\newline \\newline'
latex_code += "\\begin{lstlisting}[language=Python]\n"
latex_code += f" def {func_name}({args}):\n"
for line in body.splitlines():
if line.strip():
latex_code += " " + line + "\n" # 不需要转义 lstlisting 块内的内容
latex_code += "\\end{lstlisting}\n\n"
return latex_code
def process_folder(folder_path):
all_latex = ""
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.endswith('.py'):
file_path = os.path.join(root, file)
print(f"Processing file: {file_path}")
classes, functions = extract_functions_and_docstrings(file_path)
all_latex += generate_latex(classes, functions, file)
return all_latex
latex_template = r"""\documentclass[UTF8]{ctexart}
\usepackage{listings}
\usepackage{xcolor}
\usepackage{geometry}
\usepackage{hyperref}
% 设置页面边距
\geometry{margin=2cm}
% 配置 hyperref 宏包
\hypersetup{
colorlinks=true,
linkcolor=blue,
urlcolor=blue,
citecolor=blue
}
% 设置代码环境样式
\lstset{
basicstyle=\small\ttfamily,
columns=flexible,
numbers=left,
breaklines=true,
numberstyle=\footnotesize\color{darkgray},
frame=none,
backgroundcolor=\color[RGB]{245,245,244},
keywordstyle=\color[RGB]{40,40,255},
commentstyle=\itshape\color[RGB]{0,96,96},
stringstyle=\rmfamily\slshape\color[RGB]{128,0,0},
showstringspaces=false,
language=Python
}
\begin{document}
% 封面页
\begin{titlepage}
\centering
\vspace*{5cm}
\Huge \textbf{文档标题}
\vfill
\end{titlepage}
% 生成目录
\tableofcontents
\clearpage
"""
if __name__ == "__main__":
folder_path = "./code" # 替换为实际的文件夹路径
all_latex = process_folder(folder_path)
final_latex = latex_template + all_latex + r"\end{document}"
# 将生成的 LaTeX 代码写入 txt 文件
output_file_path = "output_latex.tex"
with open(output_file_path, 'w', encoding='utf-8') as output_file:
output_file.write(final_latex)
print(f"LaTeX 代码已成功导出到 {output_file_path}")
只需要修改 `__main__` 中的 `folder_path` 和 `output_file_path` 即可。