百度输入法自定义短语-配置文件更新器-CSDN博客

本文链接：https://blog.csdn.net/qq_37639139/article/details/145186709

总结在前

两个组成部分的refindall返回结果和字典差不多
从新的里面添加旧列表独有的部分，省去替换的操作是GPT的创新
然后注意换行符开头结尾、re.M和re.S的区别
\s和strip()范围差不多
注意文本编码兼容性
(？: ) (?=)的区别和实用，后置用的比较多，标题分割，多级应用待试

old_phrases = re.findall(r"([^\S\n]*\d+\s*,\s*[\w]+\s*=)(.*?)(?=[^\S\n]*\d+\s*,\s*[\w]+\s*=|\Z)", config_content, re.S)

GPT沟通思路

百度输入法自定义短语配置文件的语法格式是这样开头的
\d+,\s+=  姑且称这个格式为词条头
\s是要键入的字母，然后\d是该词条显示的顺序，=后面是具体的词条内容
用python写一个百度输入法自定义短语更新器
通过浏览路径的方式指定txt配置文件路径
然后我在一个长文本框里面放入新词条列表（包括词条头和具体词条内容）
点击更新按钮以后，先读取指定路径配置文件，如果没有提示不存在
读取成功后，先对长文本框内的内容读取为一个列表，每个列表元素都是[词条头，词条内容]，匹配的一个完整词条的关键是文本的开头格式符合词条头格式，文本后是另一个词条头或者文件尾，注意必须使用非贪婪匹配否则多个词条可能被识别为一个词条，然后每个词条被添加到列表中时都要被分割为词条头和词条内容
该词条列表为newwords
将配置文件读取出来的文本内容用同样的方式分割为oldwords
现在我们要进行分析，如果对newwords进行遍历，如果新词条的词条头在oldwords中存在，则将其词条内容替换成newwords里面对应的词条内容，如果词条头不存在，则将该词条直接添加到oldwords中去，然后将处理过的oldwords前后链接还原成整段文本替换原配置文件内容并保存，完成后弹窗提示。

源代码

import tkinter as tk
from tkinter import filedialog, messagebox
import re
import chardet

def detect_encoding(file_path):
    """检测文件编码"""
    with open(file_path, 'rb') as f:
        raw_data = f.read()
    result = chardet.detect(raw_data)
    return result['encoding']

def update_baidu_phrase(filepath, new_phrases_text):
    """更新百度输入法自定义短语配置文件。"""

    # 尝试读取配置文件
    try:
        encoding = detect_encoding(filepath)  # 检测文件编码
        with open(filepath, 'r', encoding=encoding) as f:  # 使用检测到的编码
            config_content = f.read()
            #print(config_content)
    except FileNotFoundError:
        messagebox.showerror("错误", "配置文件不存在！")
        return
    except Exception as e:
        messagebox.showerror("错误", f"无法读取文件: {str(e)}")
        return

    # 使用正则表达式提取旧词条，考虑每个词条由多行组成
    old_phrases = re.findall(r"([^\S\n]*\d+\s*,\s*[\w]+\s*=)(.*?)(?=[^\S\n]*\d+\s*,\s*[\w]+\s*=|\Z)", config_content, re.S)
    print(old_phrases)
    # 使用相同的正则表达式提取新词条
    new_phrases = re.findall(r"([^\S\n]*\d+\s*,\s*[\w]+\s*=)(.*?)(?=[^\S\n]*\d+\s*,\s*[\w]+\s*=|\Z)", new_phrases_text, re.S)
    print(new_phrases)
    tmp=[]
    old_dict = {key: value for key, value in old_phrases}  # 将旧词条转换为字典
    for new_key, new_value in new_phrases:
        tmp.append((new_key.strip(), new_value))
    new_phrases=tmp
    tmp=[]
    for old_key, old_value in old_phrases:
        tmp.append((old_key.strip(), old_value))
    old_phrases=tmp
    print(old_phrases)
    print(new_phrases)
    updated_phrases = []
    # 添加新词条
    for new_key, new_value in new_phrases:
        updated_phrases.append((new_key, new_value))

    # 添加未被新词条覆盖的旧词条
    for old_key, old_value in old_phrases:
        if old_key not in [new_key for new_key, _ in new_phrases]:
            updated_phrases.append((old_key, old_value))

    # 打印调试信息，检查更新的词条列表
    print("更新后的短语列表：")
    for key, value in updated_phrases:
        print(f"{key}{value}")

    # 重构配置文件内容
    updated_content = ''.join([key + value for key, value in updated_phrases])

    # 保存更新后的配置文件
    try:
        with open(filepath, 'w', encoding='utf-8-sig') as f:  # 使用 utf-8-sig 编码
            f.write(updated_content)
        messagebox.showinfo("成功", "百度输入法自定义短语更新成功！")
    except Exception as e:
        messagebox.showerror("错误", f"保存文件时出错: {str(e)}")

def browse_file():
    """浏览并选择配置文件。"""
    filepath = filedialog.askopenfilename(
        defaultextension=".txt", filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")]
    )
    filepath_entry.delete(0, tk.END)
    filepath_entry.insert(0, filepath)

def update_phrases():
    """更新短语按钮点击事件。"""
    filepath = filepath_entry.get()
    new_phrases_text = new_phrases_textarea.get("1.0", tk.END)
    update_baidu_phrase(filepath, new_phrases_text)

# 创建主窗口
root = tk.Tk()
root.title("百度输入法自定义短语更新器")

# 文件路径输入框
tk.Label(root, text="配置文件路径:").grid(row=0, column=0, sticky=tk.W)
filepath_entry = tk.Entry(root, width=50)
filepath_entry.grid(row=0, column=1)
tk.Button(root, text="浏览", command=browse_file).grid(row=0, column=2)

# 新词条文本框
tk.Label(root, text="新词条列表:").grid(row=1, column=0, sticky=tk.W)
new_phrases_textarea = tk.Text(root, height=10, width=50)
new_phrases_textarea.grid(row=1, column=1, columnspan=2)

# 更新按钮
tk.Button(root, text="更新", command=update_phrases).grid(row=2, column=1)

root.mainloop()