报错如下: File <tokenize>:446
except ImportError:
^
IndentationError: unindent does not match any outer indentation level
# 尝试12 库版本冲突,NUmpy需要1版本,有时又要2版本,进行分类控制
# 安全的版本检查和兼容性处理
import sys
import warnings
import logging
# 配置日志记录
logging.basicConfig(level=logging.WARNING,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def check_numpy_compatibility():
"""检查NumPy版本兼容性并处理问题"""
try:
import numpy as np
numpy_version = tuple(map(int, np.version.version.split('.')[:2]))
# 检查NumPy是否为2.0+
if numpy_version >= (2, 0):
logger.warning("检测到不兼容的NumPy 2.0+版本")
# 提供明确的错误信息而不是尝试自动降级
error_msg = """
️ 严重兼容性问题 ⚠️
检测到NumPy 2.0+版本,但当前环境需要NumPy 1.x版本。
解决方案:
1. 创建一个新的虚拟环境:
python -m venv ocr-env
source ocr-env/bin/activate # Linux/Mac
.\ocr-env\Scripts\activate # Windows
2. 安装兼容版本的NumPy:
pip install "numpy<2"
3. 然后安装其他依赖:
pip install pandas opencv-python paddlepaddle paddleocr cnocr pdf2image pdfplumber PyMuPDF
4. 重新运行程序
注意: Jupyter环境中无法直接降级NumPy,必须使用虚拟环境!
"""
raise ImportError(error_msg)
return np
except ImportError:
# NumPy未安装,尝试安装兼容版本
try:
logger.info("NumPy未安装,尝试安装兼容版本...")
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy<2"])
import numpy as np
return np
except Exception as e:
logger.error(f"安装NumPy失败: {str(e)}")
raise ImportError("无法安装兼容的NumPy版本,请手动安装: pip install \"numpy<2\"")
# 尝试获取兼容的NumPy版本
try:
np = check_numpy_compatibility()
logger.info(f"使用兼容的NumPy版本: {np.version.version}")
except ImportError as e:
# 在GUI环境中显示错误信息
import tkinter.messagebox as messagebox
messagebox.showerror("环境配置错误", str(e))
sys.exit(1)
import pandas as pd
import pdfplumber # PDF解析
import tkinter as tk
from tkinter import filedialog, ttk, messagebox
import os
import threading
from concurrent.futures import ThreadPoolExecutor
import time
import gc
import psutil
import openpyxl # 用于高效读取大型Excel
import re
import warnings
import pandas as pd
import pdfplumber
import re
import pandas as pd
# 增强的依赖管理和错误处理
import sys
import warnings
import logging
import traceback
import os
# 配置高级日志记录
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.FileHandler("ocr_system.log", encoding='utf-8'),
logging.StreamHandler()
]
)
logger = logging.getLogger("PDF-OCR-System")
# 1. 解决NumPy兼容性问题
def resolve_numpy_conflict():
"""确保NumPy版本与SciPy兼容"""
try:
import numpy as np
# 检查NumPy版本是否在SciPy要求的范围内 [1.16.5, 1.23.0)
numpy_version = tuple(map(int, np.__version__.split('.')[:3]))
if numpy_version >= (1, 16, 5) and numpy_version < (1, 23, 0):
logger.info(f"NumPy版本兼容: {np.__version__}")
return np
logger.warning(f"检测到不兼容的NumPy版本: {np.__version__}")
# 尝试安装兼容版本
target_version = "1.22.4" # SciPy兼容的稳定版本
logger.info(f"尝试安装NumPy {target_version}...")
import subprocess
subprocess.run(
[sys.executable, "-m", "pip", "install", f"numpy=={target_version}"],
check=True
)
# 重新加载NumPy
import importlib
importlib.reload(np)
logger.info(f"NumPy成功降级到: {np.__version__}")
return np
except ImportError:
logger.error("NumPy未安装,正在安装兼容版本...")
import subprocess
subprocess.run(
[sys.executable, "-m", "pip", "install", "numpy==1.22.4"],
check=True
)
import numpy as np
return np
except Exception as e:
logger.error(f"NumPy版本处理失败: {str(e)}")
# 回退到系统NumPy
import numpy as np
return np
# 安全初始化NumPy
try:
np = resolve_numpy_conflict()
except Exception as e:
logger.critical(f"NumPy初始化失败: {str(e)}")
sys.exit(1)
# 2. 安装缺失的关键依赖
def install_missing_dependencies():
"""安装PDF处理和OCR所需的依赖"""
missing_packages = []
# 检查关键依赖
required_packages = {
"pandas": "2.1.4",
"pdfplumber": "0.10.4",
"paddleocr": "3.2.0",
"cnocr": "2.3.0.2",
"pdf2image": "1.16.3",
"PyMuPDF": "1.24.0", # fitz模块所属包
"opencv-python": "4.9.0.80",
"openpyxl": "3.1.2",
"psutil": "5.9.7",
"scipy": "1.13.0" # 与NumPy 1.22.4兼容的版本
}
import importlib
for pkg, version in required_packages.items():
try:
mod = importlib.import_module(pkg)
installed_version = getattr(mod, '__version__', "未知")
logger.info(f"{pkg}已安装: {installed_version}")
except ImportError:
missing_packages.append(f"{pkg}=={version}")
if missing_packages:
logger.warning(f"缺失依赖: {', '.join(missing_packages)}")
try:
import subprocess
install_cmd = [sys.executable, "-m", "pip", "install"] + missing_packages
subprocess.run(install_cmd, check=True)
logger.info("缺失依赖安装完成")
except Exception as e:
logger.error(f"依赖安装失败: {str(e)}")
return False
return True
# 安装缺失依赖
install_missing_dependencies()
# 先定义 PriceComparator 类
class PriceComparator:
def __init__(self):
self.history_df = None
self.supplier_data = {}
self.pdf_files = [] # 存储处理的PDF文件路径
self.ocr_engine = None # OCR引擎实例
def initialize_ocr(self):
"""初始化OCR引擎(按需初始化)"""
if self.ocr_engine is None:
# 使用组合OCR策略:PaddleOCR + CnOCR
self.ocr_engine = {
'paddle': PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False),
'cnocr': CnOcr()
}
print("OCR引擎初始化完成")
def load_history_data(self, excel_path):
"""加载历史订单数据(优化版,使用openpyxl只读模式)"""
# ...(保持不变)...
def preprocess_image(self, image):
"""图像预处理:提高OCR识别率"""
# 转换为OpenCV格式
img = np.array(image)
# 1. 灰度转换
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# 2. 自适应直方图均衡化
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# 3. 非局部均值去噪
denoised = cv2.fastNlMeansDenoising(enhanced, h=10, templateWindowSize=7, searchWindowSize=21)
# 4. 锐化处理
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
sharpened = cv2.filter2D(denoised, -1, kernel)
return Image.fromarray(sharpened)
def ocr_image(self, image):
"""使用组合OCR引擎识别图像文本"""
self.initialize_ocr()
# 预处理图像
processed_img = self.preprocess_image(image)
# 使用PaddleOCR识别
paddle_result = self.ocr_engine['paddle'].ocr(np.array(processed_img), cls=True)
paddle_text = ""
if paddle_result and paddle_result[0]:
for line in paddle_result[0]:
if line and line[1]:
paddle_text += line[1][0] + "\n"
# 使用CnOCR识别
cnocr_result = self.ocr_engine['cnocr'].ocr(np.array(processed_img))
cnocr_text = "\n".join([line['text'] for line in cnocr_result])
# 合并结果(优先使用PaddleOCR,补充CnOCR的结果)
combined_text = paddle_text
for line in cnocr_text.split('\n'):
if line.strip() and line not in paddle_text:
combined_text += line + "\n"
return combined_text
def parse_scanned_pdf(self, pdf_path, progress_callback=None):
"""解析扫描版PDF文件(基于OCR)"""
print(f"开始处理扫描版PDF: {pdf_path}")
start_time = time.time()
# 使用PyMuPDF获取页数
with fitz.open(pdf_path) as doc:
total_pages = doc.page_count
items = []
# 创建临时目录存储转换后的图像
with tempfile.TemporaryDirectory() as temp_dir:
# 将PDF转换为图像列表
images = convert_from_path(
pdf_path,
dpi=300, # 高分辨率提高识别率
output_folder=temp_dir,
fmt='jpeg',
thread_count=4 # 多线程加速
)
for i, image in enumerate(images):
if progress_callback:
progress_callback(i+1, total_pages)
print(f"处理第 {i+1}/{len(images)} 页...")
page_text = self.ocr_image(image)
# 从OCR文本中提取数据
page_items = self.parse_ocr_text(page_text)
items.extend(page_items)
df = pd.DataFrame(items)
print(f"扫描版PDF处理完成: {pdf_path}, 耗时: {time.time()-start_time:.2f}秒")
print(f"提取了 {len(df)} 条记录")
return df
def parse_ocr_text(self, text):
"""解析OCR识别的文本内容"""
# 定义字段别名映射,兼容不同供应商的命名方式
field_mappings = {
'名称': ['名称', '品名', '产品名称', '物料名称', '物料', '化合物名称'],
'规格': ['规格', '包装规格', '规格型号', '型号', '包装', '规格说明'],
'数量': ['数量', '主数量', '订货数量', '订购数量', '采购数量'],
'价格': ['价格', '单价', '报价', '含税单价', '不含税单价'],
'CAS号': ['CAS', 'CAS号', 'CAS NO.', 'CAS No', 'CAS No.'],
'货号': ['货号', '编号', '产品编号', '编码', '物料编码'],
'品牌': ['品牌', '原厂'],
'厂家': ['厂家', '生产厂家', '制造商', '原厂'],
'证书': ['证书', '资质', '认证'],
'货期': ['货期', '交货期', '交付周期', '生产周期'],
'备注': ['备注', '说明', '附加信息']
}
# 创建反向映射字典 {别名: 标准字段名}
reverse_mappings = {}
for standard_field, aliases in field_mappings.items():
for alias in aliases:
reverse_mappings[alias.lower()] = standard_field
items = []
# 分割文本为段落(假设每个段落对应一个产品)
paragraphs = re.split(r'\n{2,}', text)
for para in paragraphs:
# 跳过空段落和页眉页脚
if len(para.strip()) < 20:
continue
# 尝试匹配键值对
item = {}
for alias, std_field in reverse_mappings.items():
# 创建匹配模式
pattern = re.compile(fr"{alias}\s*[::]?\s*([^\n]+)")
match = pattern.search(para)
if match:
item[std_field] = match.group(1).strip()
# 如果提取到了关键字段,添加到结果
if '名称' in item and '价格' in item:
# 创建物料标识
if '货号' in item and '规格' in item:
item['物料标识'] = f"{item['货号']}_{item['规格']}"
elif '名称' in item and '规格' in item:
item['物料标识'] = f"{item['名称']}_{item['规格']}"
# 数值类型转换
try:
if '数量' in item:
item['数量'] = float(re.sub(r'[^\d.]', '', item['数量']))
if '价格' in item:
item['价格'] = float(re.sub(r'[^\d.]', '', item['价格']))
except ValueError:
# 保留原始值如果不能转换
pass
items.append(item)
return items
def parse_pdf(self, pdf_path, progress_callback=None):
"""解析供应商PDF报价单 - 自动检测文本型或扫描型"""
print(f"开始解析PDF: {pdf_path}")
start_time = time.time()
supplier_name = os.path.basename(pdf_path).split('.')[0]
self.pdf_files.append(pdf_path)
# 首先尝试作为文本型PDF解析
try:
with pdfplumber.open(pdf_path) as pdf:
# 检查第一页是否有文本
first_page_text = pdf.pages[0].extract_text()
if first_page_text and len(first_page_text.strip()) > 50:
print("检测为文本型PDF,使用pdfplumber解析")
return self.parse_text_pdf(pdf_path, progress_callback)
except Exception as e:
print(f"pdfplumber解析失败: {str(e)}")
# 如果文本型解析失败或不适用,则使用OCR解析
print("检测为扫描版PDF,使用OCR解析")
return self.parse_scanned_pdf(pdf_path, progress_callback)
def parse_text_pdf(self, pdf_path, progress_callback=None):
"""解析文本型PDF(原有逻辑)"""
# ...(这里是您原有的parse_pdf方法的内容)...
# 为了节省空间,我这里省略了重复代码,您可以直接使用原有的代码
# 实际集成时请保留这部分代码
def compare_prices(self):
"""比较价格并生成结果"""
if not self.supplier_data:
return pd.DataFrame()
# 合并所有供应商数据
all_items = pd.concat(self.supplier_data.values(), keys=self.supplier_data.keys(), names=['供应商'])
all_items.reset_index(level=0, inplace=True)
# 找出每个物料的最低价格
min_prices = all_items.groupby('物料标识')['价格'].min().reset_index()
# 关联历史数据(如果有)
if self.history_df is not None:
merged_df = pd.merge(
min_prices,
self.history_df,
on='物料标识',
how='left',
suffixes=('_最低价', '_历史')
)
else:
merged_df = min_prices.rename(columns={'价格': '最低价'})
# 关联供应商名称
merged_df = pd.merge(
merged_df,
all_items[['物料标识', '供应商', '价格']],
left_on=['物料标识', '最低价'],
right_on=['物料标识', '价格'],
how='left'
)
merged_df.rename(columns={'供应商': '最低价供应商', '价格': '最低价'}, inplace=True)
# 清理临时列
merged_df.drop(columns=['价格_最低价'], inplace=True, errors='ignore')
print(f"比价分析完成,共分析 {len(merged_df)} 条记录")
return merged_df
class PDFParser:
self.ocr_engine = PaddleOCR(
use_angle_cls=True,
lang='ch',
use_gpu=True, # 自动检测GPU
show_log=False
)
logger.info("使用PaddleOCR引擎")
except ImportError:
logger.warning("PaddleOCR未安装,尝试使用CnOCR")
try:
from cnocr import CnOcr
self.ocr_engine = CnOcr()
logger.info("使用CnOCR引擎")
except ImportError:
logger.error("未安装OCR引擎,请安装PaddleOCR或CnOCR")
self.ocr_engine = None
def _pdf_to_images(self, pdf_path):
"""将PDF转换为图像列表"""
try:
from pdf2image import convert_from_path
return convert_from_path(pdf_path, dpi=300)
except ImportError:
logger.error("pdf2image未安装")
return None
except Exception as e:
logger.error(f"PDF转图像失败: {str(e)}")
return None
def _process_ocr_results(self, results):
"""处理OCR结果提取表格数据"""
# 简化的表格提取逻辑
data = []
for page_result in results:
if self.ocr_engine.__class__.__name__ == 'PaddleOCR':
# PaddleOCR结果处理
for line in page_result:
if line and line[1][0]: # 确保有文本
data.append(line[1][0])
else:
# CnOCR结果处理
for line in page_result:
if line and line['text']:
data.append(line['text'])
# TODO: 根据实际需求添加表格解析逻辑
logger.warning("OCR结果提取需要根据实际表格结构增强")
return {"text": data}
# 然后定义 Application 类
class Application(tk.Tk):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.comparator = None # 确保初始化
self.status_var = tk.StringVar(value="就绪")
self._setup_ui()
self._check_dependencies()
self._load_history_data() # 提前加载历史数据
def _check_dependencies(self):
"""增强的依赖检查,避免兼容性问题"""
self.log_message("=== 依赖库版本检查 ===")
# 记录核心库版本
self.log_message(f"Python版本: {sys.version.split()[0]}")
self.log_message(f"NumPy版本: {np.version.version}")
# 安全获取其他模块版本
def safe_get_version(module_name):
try:
module = __import__(module_name)
if hasattr(module, '__version__'):
return module.__version__
# 尝试其他方式获取版本
if module_name == "pdf2image":
from pdf2image import __version__ as pdf2image_version
return pdf2image_version
# 使用metadata获取
if sys.version_info >= (3, 8):
from importlib.metadata import version
return version(module_name)
else:
import pkg_resources
return pkg_resources.get_distribution(module_name).version
except:
return "未知版本"
# 检查关键依赖
deps = ["pandas", "pdfplumber", "paddleocr", "cnocr", "pdf2image",
"PyMuPDF", "opencv-python", "openpyxl", "psutil"]
for dep in deps:
try:
__import__(dep)
version = safe_get_version(dep)
self.log_message(f"{dep}版本: {version}")
except ImportError:
self.log_message(f"警告: {dep}未安装")
except Exception as e:
self.log_message(f"{dep}检查错误: {str(e)}")
self.log_message("=== 依赖检查完成 ===")
def _setup_ui(self):
self.title("物料价格比对系统 v3.0")
self.geometry("800x600")
# 主框架
main_frame = ttk.Frame(self)
main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
# 控制按钮框架
btn_frame = ttk.Frame(main_frame)
btn_frame.pack(fill=tk.X, pady=5)
self.btn_load_history = ttk.Button(btn_frame, text="加载历史数据", command=self.load_history)
self.btn_load_history.pack(side=tk.LEFT, padx=5)
self.btn_add_pdf = ttk.Button(btn_frame, text="添加供应商报价", command=self.add_pdf)
self.btn_add_pdf.pack(side=tk.LEFT, padx=5)
self.btn_generate = ttk.Button(btn_frame, text="生成比对报告", command=self.generate_report)
self.btn_generate.pack(side=tk.LEFT, padx=5)
self.btn_clear = ttk.Button(btn_frame, text="清除缓存", command=self.clear_cache)
self.btn_clear.pack(side=tk.RIGHT, padx=5)
# 进度条框架
progress_frame = ttk.LabelFrame(main_frame, text="处理进度")
progress_frame.pack(fill=tk.X, padx=5, pady=5)
self.progress_label = ttk.Label(progress_frame, text="就绪")
self.progress_label.pack(anchor=tk.W, padx=5, pady=2)
self.progress_bar = ttk.Progressbar(progress_frame, orient='horizontal', length=500, mode='determinate')
self.progress_bar.pack(fill=tk.X, padx=5, pady=5)
# 日志框架
log_frame = ttk.LabelFrame(main_frame, text="操作日志")
log_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
self.log_text = tk.Text(log_frame, height=15)
scrollbar = ttk.Scrollbar(log_frame, command=self.log_text.yview)
self.log_text.configure(yscrollcommand=scrollbar.set)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.log_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
self.log_text.config(state=tk.DISABLED)
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Label(self, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(side=tk.BOTTOM, fill=tk.X)
# 内存监控
self.memory_var = tk.StringVar(value="内存使用: 0 MB")
memory_label = ttk.Label(self, textvariable=self.memory_var, padding=2)
memory_label.pack(side=tk.BOTTOM, fill=tk.X)
# 禁用按钮直到历史数据加载
self.btn_add_pdf.config(state=tk.DISABLED)
self.btn_generate.config(state=tk.DISABLED)
# 启动内存监控
self.monitor_memory()
def log_message(self, message):
"""在日志框中添加消息"""
self.log_text.config(state=tk.NORMAL)
self.log_text.insert(tk.END, message + "\n")
self.log_text.see(tk.END) # 自动滚动到底部
self.log_text.config(state=tk.DISABLED)
def monitor_memory(self):
"""定期更新内存使用情况"""
process = psutil.Process(os.getpid())
mem_usage = process.memory_info().rss / (1024 * 1024) # 转换为MB
self.memory_var.set(f"内存使用: {mem_usage:.2f} MB")
self.after(5000, self.monitor_memory) # 每5秒更新一次
def clear_cache(self):
"""清除缓存释放内存"""
self.comparator = PriceComparator()
gc.collect()
self.log_message("缓存已清除,内存已释放")
self.status_var.set("就绪")
self.progress_bar["value"] = 0
self.progress_label["text"] = "就绪"
self.btn_add_pdf.config(state=tk.DISABLED)
self.btn_generate.config(state=tk.DISABLED)
self.btn_load_history.config(state=tk.NORMAL)
def _load_history_data(self):
"""安全加载历史数据"""
try:
history_path = "C:/Users/14432/Desktop/自动比价系统/近五年订单汇总.xlsx"
self.log_message(f"开始加载历史数据: {history_path}")
import pandas as pd
self.comparator.history_df = pd.read_excel(history_path)
msg = f"历史数据加载完成: {len(self.comparator.history_df)}条记录"
self.status_var.set(msg)
self.log_message(msg)
except Exception as e:
error_msg = f"历史数据加载失败: {str(e)}"
self.status_var.set(error_msg)
self.log_message(error_msg, level="ERROR")
self.comparator.history_df = pd.DataFrame() # 创建空DataFrame
def parse_pdfs(self):
"""解析PDF文件并处理错误"""
pdf_files = [
"C:/Users/14432/Desktop/自动比价系统/s26c-725082615560.pdf",
"C:/Users/14432/Desktop/自动比价系统/对照品询比价2025.08.21-2--广州佳途报价---TO华东医药-更新2.pdf",
"C:/Users/14432/Desktop/自动比价系统/广州隽沐报价单20250826-中美华东(3).pdf",
"C:/Users/14432/Desktop/自动比价系统/翔龙报价单-华东-20250825.pdf"
]
parser = PDFParser()
results = []
for pdf_path in pdf_files:
result = parser.parse_pdf(pdf_path)
if result:
results.append(result)
self.log_message(f"成功解析: {pdf_path}")
else:
self.log_message(f"解析失败: {pdf_path}", level="ERROR")
self.log_message(f"所有PDF文件解析完成,成功{len(results)}个")
def add_pdf(self):
"""添加PDF文件(多线程处理)"""
files = filedialog.askopenfilenames(filetypes=[("PDF文件", "*.pdf")])
if not files:
return
# 禁用按钮避免重复操作
self.btn_add_pdf.config(state=tk.DISABLED)
self.status_var.set(f"正在解析{len(files)}个PDF文件...")
self.log_message(f"开始解析{len(files)}个PDF文件")
# 重置进度条
self.progress_bar["value"] = 0
# 使用线程池
def process_file(file_path):
try:
# 为每个文件设置进度回调
def progress_callback(current, total):
self.after(0, lambda: self.progress_label.config(
text=f"解析 {os.path.basename(file_path)}: {current}/{total}页")
)
self.after(0, lambda: self.progress_bar.config(value=(current/total)*100))
self.comparator.parse_pdf(file_path, progress_callback)
self.after(0, lambda: self.log_message(f"成功解析: {file_path}"))
except Exception as e:
error_msg = str(e)
self.after(0, lambda: self.log_message(f"错误: 解析 {file_path} 失败 - {error_msg}"))
# 启动线程池
def thread_pool_task():
with ThreadPoolExecutor(max_workers=min(4, len(files))) as executor:
futures = [executor.submit(process_file, f) for f in files]
# 等待所有任务完成
for future in futures:
future.result()
# 完成后重置界面
self.after(0, lambda: self.progress_bar.config(value=0))
self.after(0, lambda: self.progress_label.config(text="解析完成"))
self.after(0, lambda: self.status_var.set(f"PDF解析完成: {len(files)}个文件"))
self.after(0, lambda: self.btn_add_pdf.config(state=tk.NORMAL))
self.after(0, lambda: self.log_message(f"所有PDF文件解析完成"))
threading.Thread(target=thread_pool_task, daemon=True).start()
def generate_report(self):
"""生成报告(线程安全)"""
if self.processing:
return
if not self.comparator.supplier_data:
messagebox.showwarning("警告", "请先添加供应商报价单")
return
# 默认保存路径的逻辑
if self.comparator.pdf_files:
first_pdf = self.comparator.pdf_files[0]
report_dir = os.path.dirname(first_pdf)
timestamp = time.strftime("%Y%m%d_%H%M%S")
default_name = f"比价报告_{timestamp}.xlsx"
save_path = os.path.join(report_dir, default_name)
else:
save_path = filedialog.asksaveasfilename(
defaultextension=".xlsx",
filetypes=[("Excel文件", "*.xlsx")]
)
if not save_path:
return
# 启动线程生成报告
self.processing = True
self.status_var.set("正在生成报告...")
self.log_message("开始生成比对报告")
self.progress_label.config(text="生成报告中...")
# 显示进度条但不更新具体值
self.progress_bar.config(mode='indeterminate')
self.progress_bar.start(10) # 开始滚动动画
def thread_task():
try:
start_time = time.time()
result = self.comparator.compare_prices()
# 保存结果
result.to_excel(save_path, index=False)
elapsed = time.time() - start_time
self.after(0, lambda: self.log_message(f"报告生成成功! 保存路径: {save_path}, 耗时: {elapsed:.2f}秒"))
self.after(0, lambda: self.status_var.set(f"报告生成成功: {len(result)}条记录"))
self.after(0, lambda: messagebox.showinfo("成功", f"比价报告已保存至:\n{save_path}"))
except Exception as ex: # 使用不同的变量名
error_msg = str(ex) # 将错误信息保存到局部变量
self.after(0, lambda: self.log_message(f"错误: 生成报告失败 - {error_msg}"))
self.after(0, lambda: messagebox.showerror("错误", f"生成报告失败: {error_msg}"))
finally:
self.after(0, lambda: self.progress_bar.stop())
self.after(0, lambda: self.progress_bar.config(mode='determinate', value=0))
self.after(0, lambda: self.progress_label.config(text="就绪"))
self.processing = False
threading.Thread(target=thread_task, daemon=True).start()
# 在主入口安全初始化
if __name__ == "__main__":
try:
# 忽略特定警告
warnings.filterwarnings("ignore", category=UserWarning, module='scipy')
warnings.filterwarnings("ignore", category=UserWarning, module='paddle')
app = Application()
app.mainloop()
except Exception as e:
logger.critical(f"应用程序崩溃: {str(e)}")
traceback.print_exc()
# 显示错误对话框
import tkinter.messagebox as messagebox
messagebox.showerror("系统错误", f"程序遇到严重错误:\n{str(e)}\n\n详细信息请查看日志文件")