python实现给中文词语加拼音默写文档编辑-CSDN博客

本文链接：https://blog.csdn.net/jerry201108/article/details/139916450

一、初始调试

二、input2.txt【数据源txt文件】

三、拼音在文字右侧【脚本1】

四、纯拼音4列显示【脚本2】

五、拼音在上词语在下【脚本3】

场景：根据需要可以将初中的其他年级的词语整理到txt文件中，通过脚本实现自动加拼音，纯拼音默写的word文档打印。

一、初始调试

统编版八年级上册语文电子课本（最新高清版）

from pypinyin import pinyin, Style

# 读取输入文件
input_file_path = 'input2.txt'
output_file_path = 'output.txt'

with open(input_file_path, 'r', encoding='utf-8') as input_file:
    lines = input_file.readlines()

with open(output_file_path, 'w', encoding='utf-8') as output_file:
    group_count = 0  # 用于计数每组的词语数量
    group_lines = []  # 用于存储每组的词语和拼音

    for line in lines:
        words = line.strip()  # 去除行尾的换行符
        pinyin_list = pinyin(words, style=Style.TONE)
        pinyin_str = ' '.join([''.join(py) for py in pinyin_list])

        # 将词语和拼音添加到当前组
        group_lines.append(f"{words} {pinyin_str}")
        group_count += 1

        # 当每组达到3个词语时，写入一行并重置组
        if group_count == 4:
            output_file.write(' '.join(group_lines) + '\n')
            group_lines = []
            group_count = 0

    # 处理剩余的词语（如果不足3个）
    if group_count > 0:
        output_file.write(' '.join(group_lines) + '\n')

print("转换完成，结果已写入", output_file_path)

二、input2.txt【数据源txt文件】

第一章
第1课 《消息二则》
溃退
泄气
督战
要塞
业已
摧枯拉朽
锐不可当
第2课 《首届诺贝尔奖颁发》
颁发
遗嘱
建树
仲裁
巨额
第3课 《"飞天"凌空》
凌空
翘首
酷似
潇洒
轻盈
悄然
由衷
新秀
屏息敛声
眼花缭乱
如梦初醒
第4课 《一着惊海天》
浩瀚
娴熟
咆哮
镌刻
一丝不苟
白手起家
殚精竭虑
第5课 《国行公祭，为佑世界和平》
初衷
杀戮
篡改
抵赖
妄图
辱没
呓语
遁形
铭记
彰显
惨绝人寰
振聋发聩
第二章
第6课 《藤野先生》
挟
樱花
绯红
宛如
掌故
落第
畸形
不逊
匿名
诘责
呜呼
凄然
教诲
油光可鉴
杳无消息
抑扬顿挫
正人君子
深恶痛疾
第7课 《回忆我的母亲》
溺
佃农
劳碌
私塾
周济
宽厚
仁慈
连夜
慰勉
不辍
任劳任怨
为富不仁
第8课 《列夫·托尔斯泰》
颊
黝黑
粗糙
崎岖
平庸
滞留
愚钝
器宇
蒙昧
酒肆
缰绳
轩昂
胆怯
藏污纳垢
鹤立鸡群
正襟危坐
诚惶诚恐
入木三分
第9课 《美丽的颜色》
微妙
燥热
沥青
骤雨
窒息
吹嘘
荧光
筋疲力尽
和颜悦色

三、拼音在文字右侧【脚本1】

import os
from pypinyin import pinyin, Style
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Pt

# 读取输入文件
input_file_path = 'input2.txt'
output_file_path = 'output.docx'

# 检查输入文件是否存在
if not os.path.exists(input_file_path):
    print(f"输入文件 {input_file_path} 不存在。")
    exit(1)

# 检查输出文件是否存在
if not os.path.exists(output_file_path):
    print(f"输出文件 {output_file_path} 不存在。将创建一个新的Word文档。")
    # 创建一个新的Word文档
    doc = Document()
else:
    # 打开现有的Word文档
    doc = Document(output_file_path)

# 读取输入文件内容
with open(input_file_path, 'r', encoding='utf-8') as input_file:
    lines = input_file.readlines()

# 处理每一行数据
i = 0
while i < len(lines):
    line = lines[i].strip()  # 去除行首尾的空白字符
    if line.startswith('第'):
        # 如果是以“第”开头的，直接写入Word文档的一行
        p=doc.add_paragraph()
        run = p.add_run(line)
        run.bold = True  # 设置为加粗
        run.font.size = Pt(12)  # 设置字体大小
        p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER  # 居中对齐

        # 如果是以“第”开头的行，则在其后添加一个表格
        data_lines = []
        # 读取后续行数据，直到遇到下一个以“第”开头的行或文件结束
        while i + 1 < len(lines) and not lines[i + 1].strip().startswith('第'):
            data_lines.append(lines[i + 1].strip())
            i += 1

        # 重新计算第字间的数据总量，分成2列的总行数
        totallines=len(data_lines)
        if totallines%2==0:
            rows=totallines/2
        else:
            rows=(totallines+1)/2
        print('分成2列的总行数：',rows)

        # 添加一个表格，假设每行有4列（两列用于词语，两列用于拼音）
        table = doc.add_table(rows=int(rows), cols=4)


        row=0
        # 将词语和拼音写入表格1、3列词语，2、4列拼音
        for j, line in enumerate(data_lines):
            words = line.strip()  # 去除行尾的换行符
            pinyin_list = pinyin(words, style=Style.TONE)
            pinyin_str = ' '.join([''.join(py) for py in pinyin_list])

            # print("Current row:", row)

            if j%2==1:

                # 将词语写入第3列
                table.cell(row, 2).text = words

                # 将拼音写入第4列
                table.cell(row, 3).text = pinyin_str
                row += 1
            else:
                # 将词语写入第一列
                table.cell(row, 0).text = words

                # 将拼音写入第二列
                table.cell(row, 1).text = pinyin_str

    i += 1

# 保存Word文档
doc.save(output_file_path)

print("数据已处理并写入Word文档", output_file_path)

四、纯拼音4列显示【脚本2】

# -*- coding: utf-8 -*-
# @Author: GraceJiang
# @Date: 2024/6/24
# @Description:
# 实现仅仅拼音显示，表格4列显示，方便自己默写用


import os
from pypinyin import pinyin, Style
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Pt
import math

# 读取输入文件
input_file_path = 'input2.txt'
output_file_path = 'output.docx'

# 检查输入文件是否存在
if not os.path.exists(input_file_path):
    print(f"输入文件 {input_file_path} 不存在。")
    exit(1)

# 检查输出文件是否存在
if not os.path.exists(output_file_path):
    print(f"输出文件 {output_file_path} 不存在。将创建一个新的Word文档。")
    # 创建一个新的Word文档
    doc = Document()
else:
    # 打开现有的Word文档
    doc = Document(output_file_path)

# 读取输入文件内容
with open(input_file_path, 'r', encoding='utf-8') as input_file:
    lines = input_file.readlines()

# 处理每一行数据
i = 0
while i < len(lines):
    line = lines[i].strip()  # 去除行首尾的空白字符
    if line.startswith('第'):
        # 如果是以“第”开头的，直接写入Word文档的一行
        p=doc.add_paragraph()
        run = p.add_run(line)
        run.bold = True  # 设置为加粗
        run.font.size = Pt(12)  # 设置字体大小
        p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER  # 居中对齐

        # 如果是以“第”开头的行，则在其后添加一个表格
        data_lines = []
        # 读取后续行数据，直到遇到下一个以“第”开头的行或文件结束
        while i + 1 < len(lines) and not lines[i + 1].strip().startswith('第'):
            data_lines.append(lines[i + 1].strip())
            i += 1
        print(data_lines)

        # 重新计算第字间的数据总量，分成2列的总行数
        totallines=len(data_lines)
        rows=math.ceil(totallines/4)

        print('分成的总行数：',rows)

        # 添加一个表格，假设每行有4列（两列用于词语，两列用于拼音）
        table = doc.add_table(rows=int(rows)*2, cols=4)


        row=0
        # 将词语和拼音写入表格1、3列词语，2、4列拼音
        for j, line in enumerate(data_lines):
            words = line.strip()  # 去除行尾的换行符
            pinyin_list = pinyin(words, style=Style.TONE)
            pinyin_str = ' '.join([''.join(py) for py in pinyin_list])


            #获取索引
            index=j%4
            table.cell(row, index).text = pinyin_str
            if index==3:
                row+=2


    i += 1

# 保存Word文档
doc.save(output_file_path)

print("数据已处理并写入Word文档", output_file_path)

五、拼音在上词语在下【脚本3】

# -*- coding: utf-8 -*-
# @Author: GraceJiang
# @Date: 2024/6/24
# @Description:
# 实现仅仅拼音显示，表格4列显示，方便自己默写用


import os
from pypinyin import pinyin, Style
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Pt
import math

# 读取输入文件
input_file_path = 'input2.txt'
output_file_path = '八上语文读读写写2.docx'

# 检查输入文件是否存在
if not os.path.exists(input_file_path):
    print(f"输入文件 {input_file_path} 不存在。")
    exit(1)

# # 检查输出文件是否存在
# if not os.path.exists(output_file_path):
#     print(f"输出文件 {output_file_path} 不存在。将创建一个新的Word文档。")
#     # 创建一个新的Word文档
#     doc = Document()
# else:
#     # 打开现有的Word文档
#     doc = Document(output_file_path)
doc = Document()

# 读取输入文件内容
with open(input_file_path, 'r', encoding='utf-8') as input_file:
    lines = input_file.readlines()

# 处理每一行数据
i = 0
while i < len(lines):
    line = lines[i].strip()  # 去除行首尾的空白字符
    if line.startswith('第'):
        # 如果是以“第”开头的，直接写入Word文档的一行
        p=doc.add_paragraph()
        run = p.add_run(line)
        run.bold = True  # 设置为加粗
        run.font.size = Pt(12)  # 设置字体大小
        p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER  # 居中对齐

        # 如果是以“第”开头的行，则在其后添加一个表格
        data_lines = []
        # 读取后续行数据，直到遇到下一个以“第”开头的行或文件结束
        while i + 1 < len(lines) and not lines[i + 1].strip().startswith('第'):
            data_lines.append(lines[i + 1].strip())
            i += 1
        print(data_lines)

        # 重新计算第字间的数据总量，分成2列的总行数
        totallines=len(data_lines)
        rows=math.ceil(totallines/4)

        print('分成的总行数：',rows)

        # 添加一个表格，假设每行有4列（两列用于词语，两列用于拼音）
        table = doc.add_table(rows=int(rows)*2, cols=4)


        row=0
        # 将词语和拼音写入表格1、3列词语，2、4列拼音
        for j, line in enumerate(data_lines):
            words = line.strip()  # 去除行尾的换行符
            pinyin_list = pinyin(words, style=Style.TONE)
            pinyin_str = ' '.join([''.join(py) for py in pinyin_list])


            #获取索引
            index=j%4
            table.cell(row, index).text = pinyin_str
            table.cell(row+1, index).text = words
            if index==3:
                row+=2


    i += 1

# 保存Word文档
doc.save(output_file_path)

print("数据已处理并写入Word文档", output_file_path)