import tkinter as tk
from tkinter import ttk, filedialog, messagebox, scrolledtext
import pandas as pd
import os
import threading
import tempfile
import zipfile
import shutil
import configparser
from xml.etree import ElementTree as ET
from openpyxl import load_workbook
import re
import datetime
# 辅助函数:将Excel列字母转换为索引(A=0, B=1, ..., AA=26等)
def excel_column_letter_to_index(letter):
"""Convert Excel column letter to zero-based index"""
index = 0
for char in letter:
index = index * 26 + (ord(char.upper()) - ord('A') + 1)
return index - 1
class ExcelComparatorApp:
def __init__(self, root):
self.root = root
self.root.title("Excel 文件比较工具")
self.root.geometry("1000x950")
self.root.minsize(900, 600)
# 加载配置
self.config = configparser.ConfigParser()
self.config_path = os.path.join(os.path.expanduser("~"), "excel_comparator_config.ini")
self.load_config()
# 初始化配置相关的属性
self.initialize_config_based_attributes()
# 配置样式
self.style = ttk.Style()
self.style.configure("TFrame", padding=10)
self.style.configure("TButton", padding=6)
self.style.configure("TLabel", padding=5)
self.style.configure("TCheckbutton", padding=5)
self.style.configure("Accent.TButton", background="#4CAF50", foreground="white", font=("Arial", 10, "bold"))
# 主容器使用网格布局管理器
self.main_frame = ttk.Frame(root)
self.main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
# 创建界面
self.create_interface()
# 初始化变量
self.old_file = ""
self.new_file = ""
# 确保界面元素可见
self.root.update_idletasks()
# 初始化后尝试加载列名
self.root.after(100, self.try_load_columns)
def initialize_config_based_attributes(self):
"""初始化所有配置相关的属性"""
# 文件路径
self.old_file = ""
self.new_file = ""
# 表格设置
self.skiprows = self.config.getint('SETTINGS', 'skiprows', fallback=9)
# 列范围(使用Excel字母格式)
self.signal_start_col = 0
self.signal_end_col = 12
self.data_start_col = 53
self.data_end_col = 55
# 报告类型变量
self.generate_text_report = tk.BooleanVar(
value=self.config.getboolean('SETTINGS', 'generate_text_report', fallback=True))
self.generate_excel_report = tk.BooleanVar(
value=self.config.getboolean('SETTINGS', 'generate_excel_report', fallback=True))
# 列加载基准文件选项
self.col_load_based_on = tk.StringVar(
value=self.config.get('SETTINGS', 'col_load_based_on', fallback='old'))
# 列选择变量
self.signal_col_vars = {}
self.data_col_vars = {}
# 初始化其他变量
self.old_file_entry = None
self.new_file_entry = None
self.old_sheet_var = None
self.new_sheet_var = None
self.output_path_entry = None
self.log_text = None
self.status_var = None
self.compare_btn = None
# 进度条相关
self.progress = None
self.status_label = None
# 初始化 skiprows_var
self.skiprows_var = tk.StringVar(value=str(self.skiprows))
# 列范围变量
self.signal_start_var = tk.StringVar(value=self.config.get('SETTINGS', 'signal_start', fallback='A'))
self.signal_end_var = tk.StringVar(value=self.config.get('SETTINGS', 'signal_end', fallback='M'))
self.data_start_var = tk.StringVar(value=self.config.get('SETTINGS', 'data_start', fallback='BT'))
self.data_end_var = tk.StringVar(value=self.config.get('SETTINGS', 'data_end', fallback='CD'))
# 报告类型变量
self.text_report_var = tk.BooleanVar(value=self.generate_text_report.get())
self.excel_report_var = tk.BooleanVar(value=self.generate_excel_report.get())
def try_load_columns(self):
"""尝试在界面初始化后加载列名(如果文件已存在)"""
if self.old_file_entry.get() or self.new_file_entry.get():
self.load_columns_from_file()
def load_config(self):
"""加载配置文件"""
if os.path.exists(self.config_path):
try:
self.config.read(self.config_path)
except:
self.create_default_config()
else:
self.create_default_config()
def create_default_config(self):
"""创建默认配置"""
self.config['SETTINGS'] = {
'skiprows': '9',
'signal_start': 'A',
'signal_end': 'M',
'data_start': 'BT',
'data_end': 'CD',
'generate_text_report': 'True',
'generate_excel_report': 'True',
'col_load_based_on': 'old',
'output_path': ''
}
self.save_config()
def save_config(self):
"""保存配置到文件"""
with open(self.config_path, 'w') as configfile:
self.config.write(configfile)
def create_interface(self):
"""创建界面元素 - 使用网格布局"""
# 创建行计数器
row = 0
# 文件选择部分
file_frame = ttk.LabelFrame(self.main_frame, text="文件选择")
file_frame.grid(row=row, column=0, sticky="ew", padx=5, pady=5)
row += 1
# 旧文件选择
ttk.Label(file_frame, text="旧Excel文件:").grid(row=0, column=0, sticky="w", padx=5, pady=5)
self.old_file_entry = ttk.Entry(file_frame, width=60)
self.old_file_entry.grid(row=0, column=1, padx=5, pady=5, sticky="ew")
self.old_file_entry.insert(0, self.config.get('SETTINGS', 'old_file', fallback=''))
ttk.Button(
file_frame,
text="浏览...",
command=lambda: self.browse_file(self.old_file_entry),
width=10
).grid(row=0, column=2, padx=5, pady=5)
# 旧文件Sheet配置
ttk.Label(file_frame, text="Sheet名称或索引:").grid(row=0, column=3, sticky="w", padx=(15, 5), pady=5)
self.old_sheet_var = tk.StringVar(value=self.config.get('SETTINGS', 'old_sheet', fallback=''))
self.old_sheet_entry = ttk.Entry(file_frame, textvariable=self.old_sheet_var, width=20)
self.old_sheet_entry.grid(row=0, column=4, padx=5, pady=5, sticky="w")
# 新文件选择
ttk.Label(file_frame, text="新Excel文件:").grid(row=1, column=0, sticky="w", padx=5, pady=5)
self.new_file_entry = ttk.Entry(file_frame, width=60)
self.new_file_entry.grid(row=1, column=1, padx=5, pady=5, sticky="ew")
self.new_file_entry.insert(0, self.config.get('SETTINGS', 'new_file', fallback=''))
ttk.Button(
file_frame,
text="浏览...",
command=lambda: self.browse_file(self.new_file_entry),
width=10
).grid(row=1, column=2, padx=5, pady=5)
# 新文件Sheet配置
ttk.Label(file_frame, text="Sheet名称或索引:").grid(row=1, column=3, sticky="w", padx=(15, 5), pady=5)
self.new_sheet_var = tk.StringVar(value=self.config.get('SETTINGS', 'new_sheet', fallback=''))
self.new_sheet_entry = ttk.Entry(file_frame, textvariable=self.new_sheet_var, width=20)
self.new_sheet_entry.grid(row=1, column=4, padx=5, pady=5, sticky="w")
file_frame.columnconfigure(1, weight=1)
# 设置按钮
settings_btn = ttk.Button(
self.main_frame,
text="高级设置",
command=self.open_settings_dialog,
width=15
)
settings_btn.grid(row=row, column=0, sticky="e", padx=5, pady=5)
row += 1
# 列选择部分
cols_frame = ttk.LabelFrame(self.main_frame, text="选择要比较的列 (文件选择后自动加载)")
cols_frame.grid(row=row, column=0, sticky="nsew", padx=5, pady=5)
# 配置网格权重
self.main_frame.rowconfigure(row, weight=1)
self.main_frame.columnconfigure(0, weight=1)
cols_frame.columnconfigure(0, weight=1)
cols_frame.rowconfigure(0, weight=1)
# 创建两个面板容器
paned_window = ttk.PanedWindow(cols_frame, orient=tk.HORIZONTAL)
paned_window.grid(row=0, column=0, sticky="nsew", padx=5, pady=5)
cols_frame.columnconfigure(0, weight=1)
cols_frame.rowconfigure(0, weight=1)
# 信号级别列
signal_frame = ttk.LabelFrame(paned_window, text="信号级别列")
signal_frame.grid(row=0, column=0, sticky="nsew")
# 信号级别列的滚动区域
signal_scroll = ttk.Scrollbar(signal_frame)
signal_scroll.pack(side=tk.RIGHT, fill=tk.Y)
self.signal_canvas = tk.Canvas(signal_frame, yscrollcommand=signal_scroll.set)
self.signal_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
signal_scroll.config(command=self.signal_canvas.yview)
self.signal_inner_frame = ttk.Frame(self.signal_canvas)
self.signal_canvas.create_window((0, 0), window=self.signal_inner_frame, anchor="nw")
self.signal_inner_frame.bind(
"<Configure>",
lambda e: self.signal_canvas.configure(scrollregion=self.signal_canvas.bbox("all"))
)
paned_window.add(signal_frame, weight=1)
# 数据级别列
data_frame = ttk.LabelFrame(paned_window, text="数据级别列")
data_frame.grid(row=0, column=1, sticky="nsew")
# 数据级别列的滚动区域
data_scroll = ttk.Scrollbar(data_frame)
data_scroll.pack(side=tk.RIGHT, fill=tk.Y)
self.data_canvas = tk.Canvas(data_frame, yscrollcommand=data_scroll.set)
self.data_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
data_scroll.config(command=self.data_canvas.yview)
self.data_inner_frame = ttk.Frame(self.data_canvas)
self.data_canvas.create_window((0, 0), window=self.data_inner_frame, anchor="nw")
self.data_inner_frame.bind(
"<Configure>",
lambda e: self.data_canvas.configure(scrollregion=self.data_canvas.bbox("all"))
)
paned_window.add(data_frame, weight=1)
# 绑定鼠标滚轮事件
self.signal_canvas.bind("<MouseWheel>", lambda e: self.signal_canvas.yview_scroll(int(-1*(e.delta/120)), "units"))
self.data_canvas.bind("<MouseWheel>", lambda e: self.data_canvas.yview_scroll(int(-1*(e.delta/120)), "units"))
row += 1
# 输出路径
output_frame = ttk.LabelFrame(self.main_frame, text="输出设置")
output_frame.grid(row=row, column=0, sticky="ew", padx=5, pady=5)
row += 1
# 输出路径
ttk.Label(output_frame, text="输出路径:").grid(row=0, column=0, sticky="w", padx=5, pady=5)
self.output_path_entry = ttk.Entry(output_frame)
self.output_path_entry.grid(row=0, column=1, padx=5, pady=5, sticky="ew")
self.output_path_entry.insert(0, self.config.get('SETTINGS', 'output_path', fallback=''))
ttk.Button(
output_frame,
text="浏览...",
command=self.browse_output_path,
width=10
).grid(row=0, column=2, padx=5, pady=5)
output_frame.columnconfigure(1, weight=1)
# 按钮区域
button_frame = ttk.Frame(self.main_frame)
button_frame.grid(row=row, column=0, sticky="ew", padx=5, pady=10)
row += 1
# 比较按钮
self.compare_btn = ttk.Button(
button_frame,
text="开始比较",
command=self.start_comparison_thread,
width=30,
style="Accent.TButton"
)
self.compare_btn.pack(pady=10)
# 进度条
self.progress = ttk.Progressbar(
button_frame,
orient=tk.HORIZONTAL,
length=400,
mode='indeterminate'
)
self.progress.pack(pady=5, fill=tk.X, expand=True)
self.progress.pack_forget() # 初始隐藏
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Label(self.main_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.grid(row=row, column=0, sticky="ew", padx=5, pady=5)
row += 1
# 日志区域
log_frame = ttk.LabelFrame(self.main_frame, text="日志输出")
log_frame.grid(row=row, column=0, sticky="nsew", padx=5, pady=5)
self.main_frame.rowconfigure(row, weight=1) # 日志区域获得额外空间
# 日志文本区域
self.log_text = scrolledtext.ScrolledText(
log_frame,
wrap=tk.WORD,
height=10
)
self.log_text.pack(fill="both", expand=True, padx=5, pady=5)
self.log_text.config(state=tk.DISABLED)
# 添加标签样式
self.log_text.tag_config("error", foreground="red")
self.log_text.tag_config("success", foreground="green")
self.log_text.tag_config("warning", foreground="orange")
# 添加清除日志按钮 ▼▼▼ 新增代码 ▼▼▼
clear_log_frame = ttk.Frame(log_frame)
clear_log_frame.pack(fill=tk.X, padx=5, pady=5)
ttk.Button(
clear_log_frame,
text="清除日志",
command=self.clear_log,
width=10
).pack(side=tk.RIGHT, padx=5, pady=2)
# ▼▼▼ 新增方法:清除日志 ▼▼▼
def clear_log(self):
"""清除日志内容"""
self.log_text.config(state=tk.NORMAL)
self.log_text.delete(1.0, tk.END)
self.log_text.config(state=tk.DISABLED)
self.log("日志已清除")
def open_settings_dialog(self):
"""打开设置对话框"""
dialog = tk.Toplevel(self.root)
dialog.title("高级设置")
dialog.geometry("550x500")
dialog.transient(self.root)
dialog.grab_set()
# 对话框框架
main_frame = ttk.Frame(dialog, padding=10)
main_frame.pack(fill=tk.BOTH, expand=True)
# 列名行配置
row_frame = ttk.LabelFrame(main_frame, text="列名行配置")
row_frame.pack(fill=tk.X, padx=5, pady=5)
ttk.Label(row_frame, text="列名所在行索引 (0表示第一行):").grid(row=0, column=0, sticky="w", padx=5, pady=5)
skiprows_entry = ttk.Entry(row_frame, textvariable=self.skiprows_var, width=10)
skiprows_entry.grid(row=0, column=1, padx=5, pady=5, sticky="w")
# 列范围配置
range_frame = ttk.LabelFrame(main_frame, text="列范围配置 (Excel列字母)")
range_frame.pack(fill=tk.X, padx=5, pady=5)
# 信号级别列范围
ttk.Label(range_frame, text="信号级别列范围:").grid(row=0, column=0, sticky="w", padx=5, pady=5)
signal_start_entry = ttk.Entry(range_frame, textvariable=self.signal_start_var, width=5)
signal_start_entry.grid(row=0, column=1, padx=5, pady=5, sticky="w")
ttk.Label(range_frame, text="至").grid(row=0, column=2, padx=5, pady=5)
signal_end_entry = ttk.Entry(range_frame, textvariable=self.signal_end_var, width=5)
signal_end_entry.grid(row=0, column=3, padx=5, pady=5, sticky="w")
# 数据级别列范围
ttk.Label(range_frame, text="数据级别列范围:").grid(row=1, column=0, sticky="w", padx=5, pady=5)
data_start_entry = ttk.Entry(range_frame, textvariable=self.data_start_var, width=5)
data_start_entry.grid(row=1, column=1, padx=5, pady=5, sticky="w")
ttk.Label(range_frame, text="至").grid(row=1, column=2, padx=5, pady=5)
data_end_entry = ttk.Entry(range_frame, textvariable=self.data_end_var, width=5)
data_end_entry.grid(row=1, column=3, padx=5, pady=5, sticky="w")
# 列加载基准文件设置
col_based_frame = ttk.LabelFrame(main_frame, text="列加载基准文件")
col_based_frame.pack(fill=tk.X, padx=5, pady=5)
ttk.Radiobutton(
col_based_frame,
text="基于旧文件加载列",
variable=self.col_load_based_on,
value="old"
).pack(anchor=tk.W, padx=10, pady=3)
ttk.Radiobutton(
col_based_frame,
text="基于新文件加载列",
variable=self.col_load_based_on,
value="new"
).pack(anchor=tk.W, padx=10, pady=3)
# 输出设置
output_frame = ttk.LabelFrame(main_frame, text="输出设置")
output_frame.pack(fill=tk.X, padx=5, pady=5)
# 报告输出
ttk.Checkbutton(
output_frame,
text="生成文本报告",
variable=self.text_report_var
).pack(anchor=tk.W, padx=10, pady=5)
ttk.Checkbutton(
output_frame,
text="生成Excel报告",
variable=self.excel_report_var
).pack(anchor=tk.W, padx=10, pady=5)
# 应用按钮
btn_frame = ttk.Frame(main_frame)
btn_frame.pack(fill=tk.X, pady=10)
ttk.Button(
btn_frame,
text="应用设置",
command=lambda: self.apply_settings(dialog),
width=15
).pack(side=tk.RIGHT, padx=5)
ttk.Button(
btn_frame,
text="保存设置",
command=self.save_settings,
width=15
).pack(side=tk.RIGHT, padx=5)
ttk.Button(
btn_frame,
text="重置为默认",
command=self.reset_settings,
width=15
).pack(side=tk.LEFT, padx=5)
def reset_settings(self):
"""重置设置为默认值"""
self.skiprows_var.set('9')
self.signal_start_var.set('A')
self.signal_end_var.set('M')
self.data_start_var.set('BT')
self.data_end_var.set('CD')
self.text_report_var.set(True)
self.excel_report_var.set(True)
self.col_load_based_on.set('old')
self.log("设置已重置为默认值")
def save_settings(self):
"""保存当前设置"""
try:
# 更新配置对象
self.config['SETTINGS'] = {
'skiprows': self.skiprows_var.get(),
'signal_start': self.signal_start_var.get(),
'signal_end': self.signal_end_var.get(),
'data_start': self.data_start_var.get(),
'data_end': self.data_end_var.get(),
'generate_text_report': str(self.text_report_var.get()),
'generate_excel_report': str(self.excel_report_var.get()),
'col_load_based_on': self.col_load_based_on.get(),
'old_file': self.old_file_entry.get(),
'new_file': self.new_file_entry.get(),
'old_sheet': self.old_sheet_var.get(),
'new_sheet': self.new_sheet_var.get(),
'output_path': self.output_path_entry.get()
}
# 保存到文件
self.save_config()
self.log("设置已成功保存")
except Exception as e:
self.log(f"保存设置失败: {str(e)}", error=True)
def apply_settings(self, dialog=None):
"""应用设置并关闭对话框"""
try:
# 应用列范围设置
self.apply_column_range()
# 保存设置
self.save_settings()
# 重新加载列
self.load_columns_from_file()
# 关闭对话框
if dialog:
dialog.destroy()
except Exception as e:
self.log(f"设置应用错误: {str(e)}", error=True)
def browse_file(self, entry_widget):
"""浏览文件并设置到输入框"""
file_path = filedialog.askopenfilename(
filetypes=[("Excel文件", "*.xlsx *.xls"), ("所有文件", "*.*")]
)
if file_path:
entry_widget.delete(0, tk.END)
entry_widget.insert(0, file_path)
# 文件选择后自动加载列
self.load_columns_from_file()
def browse_output_path(self):
"""浏览输出路径"""
dir_path = filedialog.askdirectory()
if dir_path:
self.output_path_entry.delete(0, tk.END)
self.output_path_entry.insert(0, dir_path)
def apply_column_range(self):
"""应用列范围设置(使用Excel字母格式)"""
try:
# 将字母转换为数字索引
self.signal_start_col = excel_column_letter_to_index(self.signal_start_var.get())
self.signal_end_col = excel_column_letter_to_index(self.signal_end_var.get())
self.data_start_col = excel_column_letter_to_index(self.data_start_var.get())
self.data_end_col = excel_column_letter_to_index(self.data_end_var.get())
self.log(f"列范围设置已应用: 信号列[{self.signal_start_var.get()} ({self.signal_start_col}) - {self.signal_end_var.get()} ({self.signal_end_col})], 数据列[{self.data_start_var.get()} ({self.data_start_col}) - {self.data_end_var.get()} ({self.data_end_col})]")
except ValueError:
self.log("错误: 请输入有效的列范围索引", error=True)
def load_columns_from_file(self):
"""从文件中加载列名(显示索引位置)并根据范围过滤"""
try:
# 确定基于哪个文件加载列
if self.col_load_based_on.get() == 'old':
file_path = self.old_file_entry.get()
sheet_name = self.old_sheet_var.get().strip()
else:
file_path = self.new_file_entry.get()
sheet_name = self.new_sheet_var.get().strip()
if not file_path:
return
skiprows = int(self.skiprows_var.get()) if self.skiprows_var.get().isdigit() else 9
# 如果sheet_name为空,则使用0(第一个sheet)
if not sheet_name and sheet_name != "0":
sheet_name = 0
else:
try:
sheet_name = int(sheet_name)
except ValueError:
pass # 保持为字符串
df = self.safe_read_excel(file_path, skiprows, sheet_name)
if df is None:
self.log(f"无法加载文件: {file_path}", error=True)
return
# 清空现有列选择
for widget in self.signal_inner_frame.winfo_children():
widget.destroy()
for widget in self.data_inner_frame.winfo_children():
widget.destroy()
# 创建列选择复选框
self.signal_col_vars = {}
self.data_col_vars = {}
# 获取配置的列范围
signal_start = self.signal_start_col
signal_end = self.signal_end_col
data_start = self.data_start_col
data_end = self.data_end_col
# 信号级别列
for i, col in enumerate(df.columns):
if signal_start <= i <= signal_end:
var = tk.BooleanVar(value=True)
cb = ttk.Checkbutton(
self.signal_inner_frame,
text=f"{i}: {col}",
variable=var
)
cb.pack(anchor=tk.W, padx=5, pady=2)
self.signal_col_vars[col] = var
# 数据级别列
for i, col in enumerate(df.columns):
if data_start <= i <= data_end:
var = tk.BooleanVar(value=True)
cb = ttk.Checkbutton(
self.data_inner_frame,
text=f"{i}: {col}",
variable=var
)
cb.pack(anchor=tk.W, padx=5, pady=2)
self.data_col_vars[col] = var
self.log(f"列名已从文件加载 ({self.col_load_based_on.get()}文件)")
except Exception as e:
self.log(f"加载列名失败: {str(e)}", error=True)
def start_comparison_thread(self):
"""启动比较线程"""
# 禁用按钮避免重复点击
self.compare_btn.config(state=tk.DISABLED)
self.status_var.set("正在比较...")
# 显示进度条
self.progress.pack(pady=5, fill=tk.X, expand=True)
self.progress.start(10)
# 获取输入参数
self.old_file = self.old_file_entry.get()
self.new_file = self.new_file_entry.get()
self.skiprows = int(self.skiprows_var.get()) if self.skiprows_var.get().isdigit() else 9
# 验证文件是否存在
if not os.path.exists(self.old_file) or not os.path.exists(self.new_file):
self.log("错误: 文件不存在", error=True)
self.comparison_finished()
return
# 获取选择的列
self.signal_cols = [col for col, var in self.signal_col_vars.items() if var.get()]
self.data_cols = [col for col, var in self.data_col_vars.items() if var.get()]
if not self.signal_cols or not self.data_cols:
self.log("错误: 请选择要比较的列", error=True)
self.comparison_finished()
return
# 获取新旧文件的Sheet名称
old_sheet = self.old_sheet_var.get().strip()
new_sheet = self.new_sheet_var.get().strip()
# 如果为空,则使用第一个Sheet
if not old_sheet and old_sheet != "0":
old_sheet = 0
else:
try:
old_sheet = int(old_sheet)
except:
pass # 保持为字符串
if not new_sheet and new_sheet != "0":
new_sheet = 0
else:
try:
new_sheet = int(new_sheet)
except:
pass
# 启动后台线程
threading.Thread(target=self.compare_excel_wrapper, args=(old_sheet, new_sheet), daemon=True).start()
def compare_excel_wrapper(self, old_sheet, new_sheet):
"""比较Excel文件的包装函数"""
try:
# 执行比较
comparison_result = self.compare_excel(self.old_file, self.new_file, self.skiprows, old_sheet, new_sheet)
if comparison_result is not None and not comparison_result.empty:
# 保存结果
output_dir = self.output_path_entry.get() or os.getcwd()
if not os.path.exists(output_dir):
try:
os.makedirs(output_dir)
self.log(f"创建输出目录: {output_dir}")
except PermissionError:
# 如果没有权限,使用临时目录
temp_dir = tempfile.mkdtemp()
self.log(f"无法创建输出目录,使用临时目录: {temp_dir}", warning=True)
output_dir = temp_dir
# 生成Excel报告 (改进的保存机制)
if self.excel_report_var.get():
# 确保文件名唯一
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
excel_filename = f"comparison_result_{timestamp}.xlsx"
excel_path = os.path.join(output_dir, excel_filename)
try:
sorted_result = comparison_result.sort_values(by='行位置')
# 尝试保存
sorted_result.to_excel(excel_path, index=False)
self.log(f"Excel报告保存到: {excel_path}", success=True)
# 尝试打开文件以确保没有锁定
with open(excel_path, 'rb') as test_file:
test_file.read(1)
except (PermissionError, OSError) as e:
# 如果保存失败,尝试使用临时目录
temp_dir = tempfile.mkdtemp()
temp_excel_path = os.path.join(temp_dir, excel_filename)
sorted_result.to_excel(temp_excel_path, index=False)
self.log(f"原始保存失败,文件已保存到临时目录: {temp_excel_path}", warning=True)
self.log(f"错误详情: {str(e)}", warning=True)
# 生成文本报告 (改进的保存机制)
if self.text_report_var.get():
# 确保文件名唯一
text_filename = f"comparison_report_{timestamp}.txt"
report_path = os.path.join(output_dir, text_filename)
try:
report = self.generate_report(comparison_result)
with open(report_path, 'w', encoding='utf-8') as f:
f.write(report)
self.log(f"文本报告保存到: {report_path}", success=True)
except (PermissionError, OSError) as e:
# 如果保存失败,尝试使用临时目录
temp_dir = tempfile.mkdtemp()
temp_report_path = os.path.join(temp_dir, text_filename)
with open(temp_report_path, 'w', encoding='utf-8') as f:
f.write(report)
self.log(f"原始保存失败,文件已保存到临时目录: {temp_report_path}", warning=True)
self.log(f"错误详情: {str(e)}", warning=True)
self.log(f"比较完成! 发现 {len(comparison_result)} 处变更", success=True)
else:
self.log("比较完成! 未发现变更", success=True)
except Exception as e:
self.log(f"比较过程中发生错误: {str(e)}", error=True)
import traceback
self.log(traceback.format_exc(), error=True)
finally:
self.comparison_finished()
def comparison_finished(self):
"""比较完成后的清理工作"""
self.progress.stop()
self.progress.pack_forget() # 隐藏进度条
self.compare_btn.config(state=tk.NORMAL)
self.status_var.set("完成")
def log(self, message, error=False, success=False):
"""记录日志消息"""
self.log_text.config(state=tk.NORMAL)
if error:
self.log_text.insert(tk.END, "[错误] ")
self.log_text.tag_add("error", "end-1c linestart", "end-1c lineend")
elif success:
self.log_text.insert(tk.END, "[成功] ")
self.log_text.tag_add("success", "end-1c linestart", "end-1c lineend")
else:
self.log_text.insert(tk.END, "[信息] ")
self.log_text.insert(tk.END, message + "\n")
self.log_text.see(tk.END)
self.log_text.config(state=tk.DISABLED)
# ======================== 以下是核心比较功能 ========================
def safe_read_excel(self, file_path, skiprows, sheet_name=None):
"""安全的Excel读取函数,包含自动修复功能和Sheet选择"""
if sheet_name is None:
sheet_name = 0 # 默认为第一个Sheet
# 最后尝试:使用二进制模式读取
self.log("二进制读取")
try:
with open(file_path, 'rb') as f:
# 尝试不同的引擎
for engine in ['openpyxl', 'xlrd', 'odf']:
try:
df = pd.read_excel(
f,
skiprows=skiprows,
dtype=str,
engine=engine,
sheet_name=sheet_name
).fillna('')
self.log(f"成功使用引擎: {engine}", success=True)
return df
except:
continue
except Exception as e:
self.log(f"二进制读取失败: {str(e)}", error=True)
# 终极手段:手动提取数据
self.log("所有方法失败,尝试手动提取数据")
try:
wb = load_workbook(file_path, read_only=True, data_only=True, keep_vba=False)
# 获取指定Sheet
if isinstance(sheet_name, int):
if sheet_name < len(wb.sheetnames):
ws = wb.worksheets[sheet_name]
else:
ws = wb.active
else:
if sheet_name in wb.sheetnames:
ws = wb[sheet_name]
else:
ws = wb.active
data = []
for i, row in enumerate(ws.iter_rows(values_only=True)):
if i < skiprows:
continue
data.append(row)
headers = data[0] if data else []
df = pd.DataFrame(data[1:], columns=headers)
return df.fillna('').astype(str)
except Exception as e:
raise ValueError(f"无法读取文件 {file_path}: {str(e)}")
def find_met_columns(self, df, is_old_file=True):
"""
查找MET列
- 旧文件:查找包含"MET"的列(V列和W列)
- 新文件:查找包含"MET"的列(AE列)
"""
met_cols = []
# 尝试通过列名匹配
for col in df.columns:
if "MET" in col.upper():
met_cols.append(col)
# 如果未找到,尝试按位置查找
if not met_cols:
self.log(f"警告: 通过列名未找到MET列,尝试按位置查找")
if is_old_file:
# 旧文件:V列(22)和W列(23),索引21和22
if len(df.columns) > 22:
met_cols = [df.columns[21], df.columns[22]]
self.log(f"使用位置索引的MET列: {met_cols}")
elif len(df.columns) > 21:
met_cols = [df.columns[21]]
else:
# 新文件:AE列(31),索引30
if len(df.columns) > 30:
met_cols = [df.columns[30]]
self.log(f"使用位置索引的MET列: {met_cols}")
if not met_cols:
self.log(f"严重警告: 未找到任何MET列,创建虚拟列以避免错误")
# 创建虚拟列
df['MET_DUMMY'] = ''
met_cols = ['MET_DUMMY']
return met_cols
def check_met_status(self, row, met_cols):
"""
检查行的MET状态
如果任一MET列包含'T'或'R',则返回True
"""
for col in met_cols:
if col in row:
met_value = str(row[col]).strip().upper()
if met_value in ['T', 'R']:
return True
return False
def build_signal_hierarchy(self, df, signal_col, data_col):
"""构建信号-数据的层级结构(修复行位置键问题)"""
signals = {}
current_signal = None
for idx, row in df.iterrows():
# 检查是否是信号行(信号名列非空)
row_data = row.to_dict() # 转换为字典以便处理
signal_val = str(row_data.get(signal_col, '')).strip()
data_val = str(row_data.get(data_col, '')).strip()
# 明确信号行判断:信号列有值且数据列为空
if signal_val and not data_val:
# 新信号开始
current_signal = signal_val
# 确保初始化所有必需键
signals.setdefault(current_signal, {
'signal_row': row_data,
'data_items': [],
'行位置': idx # 记录行位置
})
# 更新行位置
signals[current_signal]['行位置'] = idx
# 数据行判断:数据列有值且当前有活跃信号
elif current_signal and data_val:
# 确保信号字典存在
if current_signal in signals:
# 确保初始化行位置(如果尚未初始化)
if '行位置' not in signals[current_signal]:
signals[current_signal]['行位置'] = idx # 使用当前行作为默认
self.log(f"警告: 信号 {current_signal} 行位置未初始化,使用默认值 {idx}", warning=True)
# 添加数据项并记录其行位置
signals[current_signal]['data_items'].append({
'row_data': row_data,
'行位置': idx # 记录数据项的行位置
})
# 后处理:确保所有信号都有行位置
for signal_name, signal_data in signals.items():
if '行位置' not in signal_data:
# 尝试从第一个数据项获取行位置
if signal_data['data_items']:
signal_data['行位置'] = signal_data['data_items'][0]['行位置']
self.log(f"警告: 信号 {signal_name} 行位置未设置,使用第一个数据项位置", warning=True)
else:
signal_data['行位置'] = 0 # 默认值
self.log(f"警告: 信号 {signal_name} 无数据项,行位置设置为0", warning=True)
return signals
def normalize_col_name(self, col):
"""标准化列名:移除非单词字符,空格,并转为小写"""
return re.sub(r'[^\w]', '', str(col).replace(' ', '').lower())
def compare_excel(self, old_file, new_file, skiprows, old_sheet, new_sheet):
# 安全读取文件
self.log(f"正在读取旧文件: {old_file} (Sheet: {old_sheet})")
df_old = self.safe_read_excel(old_file, skiprows, old_sheet)
self.log(f"正在读取新文件: {new_file} (Sheet: {new_sheet})")
df_new = self.safe_read_excel(new_file, skiprows, new_sheet)
# === 列名标准化函数 ===
def normalize_col_name(col):
return re.sub(r'[^\w]', '', str(col).replace(' ', '').lower())
# 标准化列名
df_old.columns = [normalize_col_name(col) for col in df_old.columns]
df_new.columns = [normalize_col_name(col) for col in df_new.columns]
# === 关键列检测 ===
# 信号列
SIGNAL_COLS = ['フレーム名', 'フレーム名', 'framename', 'signalname']
signal_col = next((col for col in SIGNAL_COLS if col in df_old.columns), df_old.columns[0])
# 数据列
DATA_COLS = ['データ名', 'データ名', 'dataname', 'itemname']
data_col = next((col for col in DATA_COLS if col in df_old.columns), df_old.columns[1])
self.log(f"使用信号列: '{signal_col}'")
self.log(f"使用数据列: '{data_col}'")
# 查找MET列
old_met_cols = self.find_met_columns(df_old, is_old_file=True)
new_met_cols = self.find_met_columns(df_new, is_old_file=False)
# === 关键修复 1: 标准化用户选择的列 ===
self.signal_cols = [self.normalize_col_name(col) for col in self.signal_cols]
self.data_cols = [self.normalize_col_name(col) for col in self.data_cols]
# === 关键修复 2: 改进MET检查 ===
def check_met_status(row, met_cols):
"""修正MET状态检查"""
for col in met_cols:
try:
# 直接使用列索引访问,避免get()方法问题
met_value = str(row[col]).strip().upper()
if met_value in ['T', 'R', '1', 'Y', 'TRUE']:
return True
except KeyError:
continue
return False
# === 关键修复 3: 重构层级构建方法 ===
def build_signal_hierarchy(df, signal_col, data_col):
signals = {}
current_signal = None
for idx, row in df.iterrows():
row_data = row.to_dict()
signal_val = str(row_data.get(signal_col, '')).strip()
data_val = str(row_data.get(data_col, '')).strip()
# 明确信号行判断:信号列有值且数据列为空
if signal_val and not data_val:
current_signal = signal_val
# 保存行索引
signals[current_signal] = {
'signal_row': row_data,
'data_items': [],
'行位置': idx # 新增行位置信息
}
# 数据行判断:数据列有值
elif data_val and current_signal:
# 保存行索引
signals[current_signal]['data_items'].append({
'row_data': row_data,
'行位置': idx # 新增行位置信息
})
return signals
# 构建层级结构
old_signals = self.build_signal_hierarchy(df_old, signal_col, data_col)
new_signals = self.build_signal_hierarchy(df_new, signal_col, data_col)
# 存储结果
results = []
# 比较信号级别
for signal_name in set(old_signals.keys()) - set(new_signals.keys()):
if check_met_status(old_signals[signal_name]['signal_row'], old_met_cols):
results.append({
'信号名': signal_name,
'变更类型': '信号删除',
'数据名': '',
'变更详情': "整个信号被删除",
'行位置': old_signals[signal_name]['行位置'] # 修复:使用正确的键名
})
for signal_name in set(new_signals.keys()) - set(old_signals.keys()):
if check_met_status(new_signals[signal_name]['signal_row'], new_met_cols):
results.append({
'信号名': signal_name,
'变更类型': '信号新增',
'数据名': '',
'变更详情': "整个信号被新增",
'行位置': new_signals[signal_name]['行位置'] # 修复:使用正确的键名
})
# 比较共有信号
for signal_name in set(old_signals.keys()) & set(new_signals.keys()):
old_signal = old_signals[signal_name]
new_signal = new_signals[signal_name]
# 确保行位置存在
old_row_pos = old_signal.get('行位置', 0)
new_row_pos = new_signal.get('行位置', 0)
# 信号级别变更
signal_changes = []
for col in self.signal_cols:
norm_col = normalize_col_name(col)
old_val = str(old_signal['signal_row'].get(norm_col, '')).strip()
new_val = str(new_signal['signal_row'].get(norm_col, '')).strip()
# 数值类型特殊处理
if old_val.isdigit() and new_val.isdigit():
if int(old_val) != int(new_val):
signal_changes.append(f"{col}: {old_val}→{new_val}")
elif old_val != new_val:
signal_changes.append(f"{col}: {old_val}→{new_val}")
if signal_changes and (check_met_status(old_signal['signal_row'], old_met_cols) or
check_met_status(new_signal['signal_row'], new_met_cols)):
results.append({
'信号名': signal_name,
'变更类型': '信号变更',
'数据名': '',
'变更详情': "; ".join(signal_changes),
'行位置': old_row_pos # 使用安全获取的行位置
})
# 数据级别比较
# 创建数据项映射(数据名 -> (行位置, 行数据))
old_data_map = {}
for item in old_signal['data_items']:
data_name = str(item['row_data'].get(data_col, '')).strip()
if data_name: # 确保数据名不为空
old_data_map[data_name] = (item['行位置'], item['row_data'])
new_data_map = {}
for item in new_signal['data_items']:
data_name = str(item['row_data'].get(data_col, '')).strip()
if data_name: # 确保数据名不为空
new_data_map[data_name] = (item['行位置'], item['row_data'])
# 检查删除的数据项
for data_name in set(old_data_map.keys()) - set(new_data_map.keys()):
row_pos, old_row = old_data_map[data_name]
if check_met_status(old_row, old_met_cols):
results.append({
'信号名': signal_name,
'变更类型': '数据删除',
'数据名': data_name,
'变更详情': f"数据被删除(满足MET条件)",
'行位置': row_pos # 记录行位置
})
# 检查新增的数据项
for data_name in set(new_data_map.keys()) - set(old_data_map.keys()):
row_pos, new_row = new_data_map[data_name]
if check_met_status(new_row, new_met_cols):
results.append({
'信号名': signal_name,
'变更类型': '数据新增',
'数据名': data_name,
'变更详情': f"数据被新增(满足MET条件)",
'行位置': row_pos # 记录行位置
})
# 检查变更的数据项
for data_name in set(old_data_map.keys()) & set(new_data_map.keys()):
old_row_pos, old_row = old_data_map[data_name]
new_row_pos, new_row = new_data_map[data_name]
# 初始化 data_changes 列表
data_changes = []
# 收集所有数据列的变更
for col in self.data_cols:
try:
# 直接比较原始值,不进行strip处理
old_val = str(old_row[col])
new_val = str(new_row[col])
if old_val != new_val:
# 记录原始值(包含空格)
data_changes.append(f"{col}: [{old_val}]→[{new_val}]")
except KeyError:
continue # 如果列不存在,跳过
# 检查MET条件:旧行或新行满足MET条件
if data_changes and (check_met_status(old_row, old_met_cols)
or check_met_status(new_row, new_met_cols)):
results.append({
'信号名': signal_name,
'变更类型': '数据变更',
'数据名': data_name,
'变更详情': "; ".join(data_changes),
'行位置': old_row_pos # 使用旧文件中的行位置
})
# 转为结果DataFrame
result_df = pd.DataFrame(results)
# 如果结果为空,添加一行提示信息
if result_df.empty:
result_df = pd.DataFrame([{
'信号名': '无变更',
'变更类型': '无变更',
'数据名': '',
'变更详情': '未发现满足MET条件的变更',
'行位置': 0
}])
return result_df
def generate_report(self, comparison_result):
"""生成文本格式的详细报告(按照行位置排序)"""
report = []
# 检查是否需要生成报告
if comparison_result.empty or (len(comparison_result) == 1 and comparison_result.iloc[0]['信号名'] == '无变更'):
return "比较完成,未发现满足MET条件的变更"
# 创建行位置字典(信号名 -> 行索引)
signal_line_map = {}
# 首先处理信号级别的变更
signal_changes = comparison_result[comparison_result['数据名'] == '']
# 按行位置排序
for _, row in signal_changes.iterrows():
signal_name = row['信号名']
# 为每个信号存储变更信息和行位置 - 修复:包含'行位置'键
if signal_name not in signal_line_map:
signal_line_map[signal_name] = {
'signal_changes': [],
'data_changes': [],
'行位置': row['行位置'] # 新增行位置存储
}
else:
signal_line_map[signal_name]['行位置'] = row['行位置']
signal_line_map[signal_name]['signal_changes'].append({
'变更类型': row['变更类型'],
'详情': row['变更详情']
})
# 处理数据级别的变更
data_changes = comparison_result[comparison_result['数据名'] != '']
# 创建数据项的位置字典(信号名->数据项->行位置)
for signal_name, group in data_changes.groupby('信号名'):
if signal_name not in signal_line_map:
signal_line_map[signal_name] = {
'signal_changes': [],
'data_changes': [],
'行位置': group.iloc[0]['行位置'] # 新增行位置存储
}
# 对每个信号下的数据变更按行位置排序
sorted_data = group.sort_values(by='行位置')
for _, row in sorted_data.iterrows():
signal_line_map[signal_name]['data_changes'].append({
'数据名': row['数据名'],
'变更类型': row['变更类型'],
'详情': row['变更详情']
})
# 按信号行位置排序 - 添加默认值处理
sorted_signals = sorted(signal_line_map.keys(),
key=lambda x: signal_line_map[x].get('行位置', float('inf')))
# 生成报告内容
for signal_name in sorted_signals:
signal_info = signal_line_map[signal_name]
report.append(f"\n信号: {signal_name}")
# 输出信号级别的变更
for change in signal_info['signal_changes']:
if change['变更类型'] == '信号删除':
report.append(f" - 信号删除: {change['详情']}")
elif change['变更类型'] == '信号新增':
report.append(f" - 信号新增: {change['详情']}")
elif change['变更类型'] == '信号变更':
report.append(f" - 信号变更: {change['详情']}")
# 输出数据级别的变更(已按行位置排序)
for change in signal_info['data_changes']:
if change['变更类型'] == '数据删除':
report.append(f" - 数据删除: {change['数据名']} - {change['详情']}")
elif change['变更类型'] == '数据新增':
report.append(f" - 数据新增: {change['数据名']} - {change['详情']}")
elif change['变更类型'] == '数据变更':
report.append(f" - 数据变更: {change['数据名']} - {change['详情']}")
# 添加总结信息
total_changes = len(comparison_result)
signal_changes_count = len(signal_changes)
data_changes_count = len(data_changes)
summary = f"\n\n总结:\n"
summary += f"总变更数: {total_changes}\n"
summary += f"- 信号级别变更: {signal_changes_count}\n"
summary += f"- 数据级别变更: {data_changes_count}"
report.append(summary)
return "\n".join(report)
# ===== 主程序入口 =====
if __name__ == "__main__":
root = tk.Tk()
app = ExcelComparatorApp(root)
root.mainloop()
这个数据变更的内容是按照循序了,但是我还是想把数据新增与数据删除放在数据变更的上面
最新发布