GetOpenFileName崩溃处理办法

本文详细介绍了在初始化COM组件后遇到问题时,如何通过使用/EHa编译选项来诊断并解决问题。重点在于在debug模式下发现异常发生在ntdll.dll模块,提供了一种有效的方法来定位和解决此类问题。

这个问题在初始化COM后如果还解决不了,可以使用"/EHa"编译先项解决。

加上"/EHa"后,在debug模式可以看出,是在ntdll.dll中出现了异常。

import sys import cv2 import numpy as np from PySide6.QtWidgets import ( QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QDialog, QLabel, QPushButton, QFileDialog, QTextEdit, QTabWidget, QMessageBox, QProgressBar, QSlider, QComboBox, QGroupBox, QGridLayout, QToolBar, QStatusBar, QDockWidget, QSplitter, QScrollArea, QMenu, QSpinBox, QDoubleSpinBox, QCheckBox, QRadioButton, QButtonGroup # 添加缺失的导入 ) from PySide6.QtGui import ( QAction, QPixmap, QImage, QPainter, QPen, QColor, QIcon, QKeySequence, QTransform, QCursor ) from PySide6.QtCore import Qt, QThread, Signal, QPoint, QSize, QRect import matplotlib matplotlib.use('Agg') # 使用Agg后端,不显示图形窗口 import matplotlib.pyplot as plt from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from matplotlib.figure import Figure import os import math from scipy import ndimage try: from skimage.feature import graycomatrix, graycoprops except ImportError: from skimage.feature import greycomatrix as graycomatrix, greycoprops as graycoprops plt.rcParams["font.family"] = ["SimHei"] # 仅保留 SimHei(黑体) plt.rcParams["axes.unicode_minus"] = False class ImageProcessingThread(QThread): """图像处理的工作线程,避免界面卡顿""" finished = Signal(object) def __init__(self, function, *args): super().__init__() self.function = function self.args = args def run(self): result = self.function(*self.args) self.finished.emit(result) class ImageViewer(QWidget): """图像显示组件,支持缩放和平移""" def __init__(self, parent=None): super().__init__(parent) self.image = QImage() self.scale_factor = 1.0 self.dragging = False self.last_pos = QPoint() self.setMouseTracking(True) layout = QVBoxLayout(self) self.label = QLabel(self) self.label.setAlignment(Qt.AlignCenter) self.label.setMinimumSize(1, 1) # 允许缩小 layout.addWidget(self.label) def set_image(self, image): self.image = image self.update_pixmap() def update_pixmap(self): if not self.image.isNull(): scaled_pixmap = QPixmap.fromImage(self.image).scaled( self.image.width() * self.scale_factor, self.image.height() * self.scale_factor, Qt.KeepAspectRatio, Qt.SmoothTransformation) self.label.setPixmap(scaled_pixmap) def wheelEvent(self, event): """鼠标滚轮缩放""" delta = event.angleDelta().y() if delta > 0: self.scale_factor *= 1.1 else: self.scale_factor *= 0.9 self.update_pixmap() def mousePressEvent(self, event): """鼠标按下开始拖动""" if event.button() == Qt.LeftButton: self.dragging = True self.last_pos = event.position().toPoint() # 修改后 def mouseMoveEvent(self, event): """鼠标拖动图像""" if self.dragging: delta = event.pos() - self.last_pos scroll_bar = self.parent().horizontalScrollBar() scroll_bar.setValue(scroll_bar.value() - delta.x()) scroll_bar = self.parent().verticalScrollBar() scroll_bar.setValue(scroll_bar.value() - delta.y()) self.last_pos = event.pos() def mouseReleaseEvent(self, event): """鼠标释放结束拖动""" if event.button() == Qt.LeftButton: self.dragging = False def resizeEvent(self, event): """窗口大小变化更新图像显示""" self.update_pixmap() super().resizeEvent(event) class HistogramWidget(QWidget): """直方图显示组件""" def __init__(self, parent=None): super().__init__(parent) self.figure = Figure(figsize=(5, 3), dpi=100) self.canvas = FigureCanvas(self.figure) layout = QVBoxLayout(self) layout.addWidget(self.canvas) self.axes = self.figure.add_subplot(111) def update_histogram(self, image): """更新直方图显示""" self.axes.clear() if image.ndim == 3: # 彩色图像 colors = ('b', 'g', 'r') for i, color in enumerate(colors): hist = cv2.calcHist([image], [i], None, [256], [0, 256]) self.axes.plot(hist, color=color) else: # 灰度图像 hist = cv2.calcHist([image], [0], None, [256], [0, 256]) self.axes.plot(hist, color='black') self.axes.set_xlim([0, 256]) self.axes.set_title('图像直方图') self.axes.set_xlabel('像素值') self.axes.set_ylabel('像素数量') self.figure.tight_layout() self.canvas.draw() class GLCMWidget(QWidget): """灰度共生矩阵(GLCM)特征显示组件""" def __init__(self, parent=None): super().__init__(parent) layout = QVBoxLayout(self) self.text_edit = QTextEdit() self.text_edit.setReadOnly(True) layout.addWidget(self.text_edit) def update_glcm(self, image): """更新GLCM特征显示""" if image.ndim == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 量化到16级灰度以减少计算量 image_quantized = (image // 16).astype(np.uint8) # 计算GLCM矩阵 (距离=1, 角度=0, 45, 90, 135度) glcm = graycomatrix(image_quantized, distances=[1], angles=[0, np.pi / 4, np.pi / 2, 3 * np.pi / 4], levels=16, symmetric=True, normed=True) # 计算各种特征 contrast = graycoprops(glcm, 'contrast')[0] dissimilarity = graycoprops(glcm, 'dissimilarity')[0] homogeneity = graycoprops(glcm, 'homogeneity')[0] energy = graycoprops(glcm, 'energy')[0] correlation = graycoprops(glcm, 'correlation')[0] asm = graycoprops(glcm, 'ASM')[0] # 显示结果 result = "灰度共生矩阵(GLCM)特征:\n\n" result += f"对比度: {contrast}\n" result += f"相异性: {dissimilarity}\n" result += f"同质性: {homogeneity}\n" result += f"能量: {energy}\n" result += f"相关性: {correlation}\n" result += f"角二阶矩: {asm}\n" self.text_edit.setText(result) class ProcessingDialog(QDialog): def __init__(self, title, parent=None): super().__init__(parent) self.setWindowTitle(title) self.layout = QGridLayout(self) self.setLayout(self.layout) self.row = 0 def add_slider(self, label_text, min_val, max_val, default_val, step=1, callback=None): """添加滑块控件""" label = QLabel(label_text) slider = QSlider(Qt.Horizontal) slider.setMinimum(min_val) slider.setMaximum(max_val) slider.setValue(default_val) slider.setSingleStep(step) spinbox = QSpinBox() # 修正拼写错误 spinbox.setMinimum(min_val) spinbox.setMaximum(max_val) spinbox.setValue(default_val) spinbox.setSingleStep(step) # 同步滑块和数值框 slider.valueChanged.connect(spinbox.setValue) spinbox.valueChanged.connect(slider.setValue) if callback: slider.valueChanged.connect(callback) self.layout.addWidget(label, self.row, 0) self.layout.addWidget(slider, self.row, 1) self.layout.addWidget(spinbox, self.row, 2) self.row += 1 return slider, spinbox def add_double_slider(self, label_text, min_val, max_val, default_val, step=0.1, decimals=1, callback=None): """添加浮点数滑块控件""" label = QLabel(label_text) slider = QSlider(Qt.Horizontal) slider.setMinimum(int(min_val * 10)) slider.setMaximum(int(max_val * 10)) slider.setValue(int(default_val * 10)) spinbox = QDoubleSpinBox() spinbox.setMinimum(min_val) spinbox.setMaximum(max_val) spinbox.setValue(default_val) spinbox.setSingleStep(step) spinbox.setDecimals(decimals) # 同步滑块和数值框 def update_slider(value): slider.setValue(int(value * 10)) def update_spinbox(value): spinbox.setValue(value / 10) slider.valueChanged.connect(update_spinbox) spinbox.valueChanged.connect(update_slider) if callback: slider.valueChanged.connect(lambda: callback(spinbox.value())) self.layout.addWidget(label, self.row, 0) self.layout.addWidget(slider, self.row, 1) self.layout.addWidget(spinbox, self.row, 2) self.row += 1 return slider, spinbox def add_combo_box(self, label_text, items, default_index=0, callback=None): """添加下拉选择框""" label = QLabel(label_text) combo_box = QComboBox() combo_box.addItems(items) combo_box.setCurrentIndex(default_index) if callback: combo_box.currentIndexChanged.connect(callback) self.layout.addWidget(label, self.row, 0) self.layout.addWidget(combo_box, self.row, 1, 1, 2) self.row += 1 return combo_box def add_checkbox(self, label_text, default_state=False, callback=None): """添加复选框""" checkbox = QCheckBox(label_text) checkbox.setChecked(default_state) if callback: checkbox.stateChanged.connect(callback) self.layout.addWidget(checkbox, self.row, 0, 1, 3) self.row += 1 return checkbox def add_radio_buttons(self, label_text, options, default_index=0, callback=None): """添加单选按钮组""" label = QLabel(label_text) button_group = QButtonGroup(self) layout = QHBoxLayout() for i, option in enumerate(options): radio = QRadioButton(option) if i == default_index: radio.setChecked(True) button_group.addButton(radio, i) layout.addWidget(radio) if callback: button_group.buttonClicked.connect(callback) self.layout.addWidget(label, self.row, 0) self.layout.addLayout(layout, self.row, 1, 1, 2) self.row += 1 return button_group def add_button_box(self): """添加确认和取消按钮""" button_layout = QHBoxLayout() ok_button = QPushButton("确定") cancel_button = QPushButton("取消") ok_button.clicked.connect(self.accept) cancel_button.clicked.connect(self.reject) button_layout.addStretch() button_layout.addWidget(ok_button) button_layout.addWidget(cancel_button) self.layout.addLayout(button_layout, self.row, 0, 1, 3) self.row += 1 return ok_button, cancel_button class MainWindow(QMainWindow): """主窗口类""" def __init__(self): super().__init__() self.setWindowTitle("数字图像处理系统") self.setGeometry(100, 100, 1200, 800) # 初始化变量 self.original_image = None # 原始图像 self.processed_image = None # 处理后的图像 self.current_image = None # 当前显示的图像 self.history = [] # 操作历史 self.history_index = -1 # 当前历史位置 # 创建中心部件 self.central_widget = QWidget() self.setCentralWidget(self.central_widget) # 创建主布局 self.main_layout = QHBoxLayout(self.central_widget) # 创建左侧面板 self.left_panel = QVBoxLayout() # 创建工具栏 self.create_toolbar() # 创建图像显示区域 self.create_image_viewer() # 创建右侧面板 self.right_panel = QVBoxLayout() # 创建处理历史标签页 self.create_history_tabs() # 添加分割器 self.splitter = QSplitter(Qt.Horizontal) left_widget = QWidget() left_widget.setLayout(self.left_panel) right_widget = QWidget() right_widget.setLayout(self.right_panel) self.splitter.addWidget(left_widget) self.splitter.addWidget(right_widget) self.splitter.setSizes([800, 400]) # 初始大小 self.main_layout.addWidget(self.splitter) # 创建菜单 self.create_menu() # 状态栏 self.statusBar().showMessage("就绪") def create_menu(self): """创建菜单栏""" # 文件菜单 file_menu = self.menuBar().addMenu("文件") open_action = QAction("打开", self) open_action.setShortcut(QKeySequence.Open) open_action.triggered.connect(self.open_image) file_menu.addAction(open_action) save_action = QAction("保存", self) save_action.setShortcut(QKeySequence.Save) save_action.triggered.connect(self.save_image) file_menu.addAction(save_action) save_as_action = QAction("另存为", self) save_as_action.setShortcut(QKeySequence.SaveAs) save_as_action.setShortcut("Ctrl+Shift+S") save_as_action.triggered.connect(self.save_image_as) file_menu.addAction(save_as_action) file_menu.addSeparator() exit_action = QAction("退出", self) exit_action.setShortcut(QKeySequence.Quit) exit_action.triggered.connect(self.close) file_menu.addAction(exit_action) # 编辑菜单 edit_menu = self.menuBar().addMenu("编辑") undo_action = QAction("撤销", self) undo_action.setShortcut(QKeySequence.Undo) undo_action.triggered.connect(self.undo) edit_menu.addAction(undo_action) redo_action = QAction("重做", self) redo_action.setShortcut(QKeySequence.Redo) redo_action.triggered.connect(self.redo) edit_menu.addAction(redo_action) # 处理菜单 process_menu = self.menuBar().addMenu("图像处理") # 图像转换子菜单 convert_menu = QMenu("图像转换", self) rgb_to_gray_action = QAction("RGB转灰度", self) rgb_to_gray_action.triggered.connect(self.rgb_to_gray) convert_menu.addAction(rgb_to_gray_action) resize_action = QAction("调整分辨率", self) resize_action.triggered.connect(self.resize_image) convert_menu.addAction(resize_action) process_menu.addMenu(convert_menu) # 图像增强子菜单 enhance_menu = QMenu("图像增强", self) histogram_equalization_action = QAction("直方图均衡化", self) histogram_equalization_action.triggered.connect(self.histogram_equalization) enhance_menu.addAction(histogram_equalization_action) log_transform_action = QAction("对数变换", self) log_transform_action.triggered.connect(self.log_transform) enhance_menu.addAction(log_transform_action) power_law_action = QAction("幂律变换", self) power_law_action.triggered.connect(self.power_law_transform) enhance_menu.addAction(power_law_action) blur_menu = QMenu("平滑滤波", self) mean_blur_action = QAction("均值滤波", self) mean_blur_action.triggered.connect(lambda: self.spatial_filtering("均值滤波")) blur_menu.addAction(mean_blur_action) gaussian_blur_action = QAction("高斯滤波", self) gaussian_blur_action.triggered.connect(lambda: self.spatial_filtering("高斯滤波")) blur_menu.addAction(gaussian_blur_action) median_blur_action = QAction("中值滤波", self) median_blur_action.triggered.connect(lambda: self.spatial_filtering("中值滤波")) blur_menu.addAction(median_blur_action) enhance_menu.addMenu(blur_menu) sharpen_menu = QMenu("锐化滤波", self) sobel_action = QAction("Sobel算子", self) sobel_action.triggered.connect(lambda: self.spatial_filtering("Sobel算子")) sharpen_menu.addAction(sobel_action) prewitt_action = QAction("Prewitt算子", self) prewitt_action.triggered.connect(lambda: self.spatial_filtering("Prewitt算子")) sharpen_menu.addAction(prewitt_action) laplacian_action = QAction("Laplacian算子", self) laplacian_action.triggered.connect(lambda: self.spatial_filtering("Laplacian算子")) sharpen_menu.addAction(laplacian_action) enhance_menu.addMenu(sharpen_menu) process_menu.addMenu(enhance_menu) # 图像复原子菜单 restore_menu = QMenu("图像复原", self) motion_deblur_action = QAction("运动模糊复原", self) motion_deblur_action.triggered.connect(self.motion_deblur) restore_menu.addAction(motion_deblur_action) gaussian_noise_removal_action = QAction("高斯噪声去除", self) gaussian_noise_removal_action.triggered.connect(lambda: self.noise_removal("高斯噪声")) restore_menu.addAction(gaussian_noise_removal_action) salt_pepper_noise_removal_action = QAction("椒盐噪声去除", self) salt_pepper_noise_removal_action.triggered.connect(lambda: self.noise_removal("椒盐噪声")) restore_menu.addAction(salt_pepper_noise_removal_action) process_menu.addMenu(restore_menu) # 几何变换子菜单 geometric_menu = QMenu("几何变换", self) translate_action = QAction("平移", self) translate_action.triggered.connect(self.translate_image) geometric_menu.addAction(translate_action) rotate_action = QAction("旋转", self) rotate_action.triggered.connect(self.rotate_image) geometric_menu.addAction(rotate_action) scale_action = QAction("缩放", self) scale_action.triggered.connect(self.scale_image) geometric_menu.addAction(scale_action) flip_action = QAction("镜像", self) flip_action.triggered.connect(self.flip_image) geometric_menu.addAction(flip_action) process_menu.addMenu(geometric_menu) # 形态学处理子菜单 morphology_menu = QMenu("形态学处理", self) erosion_action = QAction("腐蚀", self) erosion_action.triggered.connect(lambda: self.morphological_operation("腐蚀")) morphology_menu.addAction(erosion_action) dilation_action = QAction("膨胀", self) dilation_action.triggered.connect(lambda: self.morphological_operation("膨胀")) morphology_menu.addAction(dilation_action) opening_action = QAction("开运算", self) opening_action.triggered.connect(lambda: self.morphological_operation("开运算")) morphology_menu.addAction(opening_action) closing_action = QAction("闭运算", self) closing_action.triggered.connect(lambda: self.morphological_operation("闭运算")) morphology_menu.addAction(closing_action) edge_extraction_action = QAction("边界提取", self) edge_extraction_action.triggered.connect(self.edge_extraction) morphology_menu.addAction(edge_extraction_action) process_menu.addMenu(morphology_menu) # 图像分割子菜单 segmentation_menu = QMenu("图像分割", self) threshold_action = QAction("阈值分割", self) threshold_action.triggered.connect(self.threshold_segmentation) segmentation_menu.addAction(threshold_action) adaptive_threshold_action = QAction("自适应阈值分割", self) adaptive_threshold_action.triggered.connect(self.adaptive_threshold_segmentation) segmentation_menu.addAction(adaptive_threshold_action) watershed_action = QAction("分水岭分割", self) watershed_action.triggered.connect(self.watershed_segmentation) segmentation_menu.addAction(watershed_action) process_menu.addMenu(segmentation_menu) # 图像描述子菜单 description_menu = QMenu("图像描述", self) hu_moments_action = QAction("计算Hu不变矩", self) hu_moments_action.triggered.connect(self.calculate_hu_moments) description_menu.addAction(hu_moments_action) glcm_action = QAction("计算灰度共生矩阵", self) glcm_action.triggered.connect(self.calculate_glcm) description_menu.addAction(glcm_action) process_menu.addMenu(description_menu) # 视图菜单 view_menu = self.menuBar().addMenu("视图") zoom_in_action = QAction("放大", self) zoom_in_action.setShortcut("Ctrl++") zoom_in_action.triggered.connect(self.zoom_in) view_menu.addAction(zoom_in_action) zoom_out_action = QAction("缩小", self) zoom_out_action.setShortcut("Ctrl+-") zoom_out_action.triggered.connect(self.zoom_out) view_menu.addAction(zoom_out_action) fit_to_window_action = QAction("适应窗口", self) fit_to_window_action.setShortcut("Ctrl+F") fit_to_window_action.triggered.connect(self.fit_to_window) view_menu.addAction(fit_to_window_action) # 帮助菜单 help_menu = self.menuBar().addMenu("帮助") about_action = QAction("关于", self) about_action.triggered.connect(self.about) help_menu.addAction(about_action) help_action = QAction("帮助", self) help_action.triggered.connect(self.show_help) help_menu.addAction(help_action) def create_toolbar(self): """创建工具栏""" toolbar = QToolBar("工具栏") self.addToolBar(toolbar) # 文件操作 open_action = QAction(QIcon.fromTheme("document-open"), "打开", self) open_action.triggered.connect(self.open_image) toolbar.addAction(open_action) save_action = QAction(QIcon.fromTheme("document-save"), "保存", self) save_action.triggered.connect(self.save_image) toolbar.addAction(save_action) toolbar.addSeparator() # 编辑操作 undo_action = QAction(QIcon.fromTheme("edit-undo"), "撤销", self) undo_action.triggered.connect(self.undo) toolbar.addAction(undo_action) redo_action = QAction(QIcon.fromTheme("edit-redo"), "重做", self) redo_action.triggered.connect(self.redo) toolbar.addAction(redo_action) toolbar.addSeparator() # 视图操作 zoom_in_action = QAction(QIcon.fromTheme("zoom-in"), "放大", self) zoom_in_action.triggered.connect(self.zoom_in) toolbar.addAction(zoom_in_action) zoom_out_action = QAction(QIcon.fromTheme("zoom-out"), "缩小", self) zoom_out_action.triggered.connect(self.zoom_out) toolbar.addAction(zoom_out_action) fit_to_window_action = QAction(QIcon.fromTheme("zoom-fit-best"), "适应窗口", self) fit_to_window_action.triggered.connect(self.fit_to_window) toolbar.addAction(fit_to_window_action) toolbar.addSeparator() # 图像比较 compare_action = QAction(QIcon.fromTheme("view-compare"), "比较原图", self) compare_action.triggered.connect(self.compare_with_original) toolbar.addAction(compare_action) def create_image_viewer(self): """创建图像显示区域""" # 创建滚动区域 self.scroll_area = QScrollArea() self.scroll_area.setWidgetResizable(True) # 创建图像查看器 self.image_viewer = ImageViewer() self.scroll_area.setWidget(self.image_viewer) # 添加到左侧面板 self.left_panel.addWidget(self.scroll_area) # 创建直方图显示区域 self.histogram_widget = HistogramWidget() self.left_panel.addWidget(self.histogram_widget) def create_history_tabs(self): """创建历史标签页""" self.history_tabs = QTabWidget() # 原始图像标签页 self.original_tab = QWidget() self.original_tab_layout = QVBoxLayout(self.original_tab) self.original_viewer = ImageViewer() self.original_tab_layout.addWidget(self.original_viewer) self.history_tabs.addTab(self.original_tab, "原始图像") # 处理后图像标签页 self.processed_tab = QWidget() self.processed_tab_layout = QVBoxLayout(self.processed_tab) self.processed_viewer = ImageViewer() self.processed_tab_layout.addWidget(self.processed_viewer) self.history_tabs.addTab(self.processed_tab, "处理后图像") # 描述信息标签页 self.info_tab = QWidget() self.info_tab_layout = QVBoxLayout(self.info_tab) self.info_text = QTextEdit() self.info_text.setReadOnly(True) self.info_tab_layout.addWidget(self.info_text) self.history_tabs.addTab(self.info_tab, "图像信息") # GLCM特征标签页 self.glcm_tab = QWidget() self.glcm_tab_layout = QVBoxLayout(self.glcm_tab) self.glcm_widget = GLCMWidget() self.glcm_tab_layout.addWidget(self.glcm_widget) self.history_tabs.addTab(self.glcm_tab, "GLCM特征") # 添加到右侧面板 self.right_panel.addWidget(self.history_tabs) def open_image(self): """打开图像文件""" file_path, _ = QFileDialog.getOpenFileName( self, "打开图像", "", "图像文件 (*.png *.jpg *.jpeg *.bmp *.gif *.tiff)" ) if file_path: self.statusBar().showMessage(f"正在加载图像: {file_path}") # 在单独的线程中加载图像,避免界面卡顿 thread = ImageProcessingThread(self._load_image, file_path) thread.finished.connect(self._on_image_loaded) thread.start() def _load_image(self, file_path): """在线程中加载图像""" image = cv2.imread(file_path) if image is None: return None, file_path return image, file_path def _on_image_loaded(self, result): """图像加载完成后的回调函数""" image, file_path = result if image is None: QMessageBox.critical(self, "错误", f"无法加载图像: {file_path}") self.statusBar().showMessage("加载图像失败") return self.original_image = image self.processed_image = image.copy() self.current_image = image.copy() # 显示图像 self.display_image(self.current_image) # 更新原始图像查看器 self.original_viewer.set_image(self.cv_to_qimage(self.original_image)) # 更新图像信息 self.update_image_info() # 清空历史 self.history = [self.original_image.copy()] self.history_index = 0 self.statusBar().showMessage(f"已加载图像: {os.path.basename(file_path)}") def save_image(self): """保存当前图像""" if self.processed_image is None: QMessageBox.warning(self, "警告", "没有可保存的图像") return if not hasattr(self, 'current_file_path'): self.save_image_as() else: try: cv2.imwrite(self.current_file_path, self.processed_image) self.statusBar().showMessage(f"已保存图像: {os.path.basename(self.current_file_path)}") except Exception as e: QMessageBox.critical(self, "错误", f"保存图像失败: {str(e)}") self.statusBar().showMessage("保存图像失败") def save_image_as(self): """另存为图像""" if self.processed_image is None: QMessageBox.warning(self, "警告", "没有可保存的图像") return file_path, _ = QFileDialog.getSaveFileName( self, "保存图像", "", "PNG (*.png);;JPEG (*.jpg);;BMP (*.bmp);;TIFF (*.tiff)" ) if file_path: try: # 确保保存的是RGB格式 if len(self.processed_image.shape) == 3: image_to_save = cv2.cvtColor(self.processed_image, cv2.COLOR_BGR2RGB) else: image_to_save = self.processed_image cv2.imwrite(file_path, image_to_save) self.current_file_path = file_path self.statusBar().showMessage(f"已保存图像: {os.path.basename(file_path)}") except Exception as e: QMessageBox.critical(self, "错误", f"保存图像失败: {str(e)}") self.statusBar().showMessage("保存图像失败") def display_image(self, image): """显示图像""" if image is None: return qimage = self.cv_to_qimage(image) self.image_viewer.set_image(qimage) # 更新直方图 self.histogram_widget.update_histogram(image) # 更新处理后图像查看器 self.processed_viewer.set_image(qimage) def cv_to_qimage(self, cv_image): """将OpenCV图像转换为Qt图像""" if len(cv_image.shape) == 3: # 彩色图像 height, width, channel = cv_image.shape bytes_per_line = 3 * width qimage = QImage(cv_image.data, width, height, bytes_per_line, QImage.Format_BGR888) else: # 灰度图像 height, width = cv_image.shape bytes_per_line = width qimage = QImage(cv_image.data, width, height, bytes_per_line, QImage.Format_Grayscale8) return qimage def update_image_info(self): """更新图像信息""" if self.original_image is None: return info = "图像信息:\n\n" info += f"尺寸: {self.original_image.shape[1]} x {self.original_image.shape[0]} 像素\n" if len(self.original_image.shape) == 3: info += f"通道数: {self.original_image.shape[2]}\n" info += "类型: 彩色图像\n" else: info += "通道数: 1\n" info += "类型: 灰度图像\n" info += f"数据类型: {self.original_image.dtype}" self.info_text.setText(info) def add_to_history(self, processed_image, operation_name): """添加操作到历史记录""" # 如果当前不在历史的末尾,删除后面的所有历史 if self.history_index < len(self.history) - 1: self.history = self.history[:self.history_index + 1] # 添加新的历史记录 self.history.append(processed_image.copy()) self.history_index += 1 # 更新处理后图像标签页 self.processed_viewer.set_image(self.cv_to_qimage(processed_image)) # 更新历史标签页标题 self.history_tabs.setTabText(1, f"处理后图像 ({operation_name})") def undo(self): """撤销操作""" if self.history_index > 0: self.history_index -= 1 self.processed_image = self.history[self.history_index].copy() self.display_image(self.processed_image) self.statusBar().showMessage("已撤销操作") def redo(self): """重做操作""" if self.history_index < len(self.history) - 1: self.history_index += 1 self.processed_image = self.history[self.history_index].copy() self.display_image(self.processed_image) self.statusBar().showMessage("已重做操作") def zoom_in(self): """放大图像""" self.image_viewer.scale_factor *= 1.1 self.image_viewer.update_pixmap() def zoom_out(self): """缩小图像""" self.image_viewer.scale_factor *= 0.9 self.image_viewer.update_pixmap() def fit_to_window(self): """适应窗口显示""" if self.current_image is None: return # 计算适应窗口的缩放因子 scroll_area_width = self.scroll_area.width() - 20 # 减去边框 scroll_area_height = self.scroll_area.height() - 20 image_width = self.current_image.shape[1] image_height = self.current_image.shape[0] scale_x = scroll_area_width / image_width scale_y = scroll_area_height / image_height self.image_viewer.scale_factor = min(scale_x, scale_y) self.image_viewer.update_pixmap() def compare_with_original(self): """比较处理后的图像与原始图像""" if self.original_image is None or self.processed_image is None: return # 创建一个新窗口进行比较 compare_window = QMainWindow() compare_window.setWindowTitle("图像比较") compare_window.resize(1000, 500) # 创建分割器 splitter = QSplitter(Qt.Horizontal) # 左侧显示原始图像 left_widget = QWidget() left_layout = QVBoxLayout(left_widget) left_label = QLabel("原始图像") left_label.setAlignment(Qt.AlignCenter) left_viewer = ImageViewer() left_viewer.set_image(self.cv_to_qimage(self.original_image)) left_layout.addWidget(left_label) left_layout.addWidget(left_viewer) # 右侧显示处理后的图像 right_widget = QWidget() right_layout = QVBoxLayout(right_widget) right_label = QLabel("处理后图像") right_label.setAlignment(Qt.AlignCenter) right_viewer = ImageViewer() right_viewer.set_image(self.cv_to_qimage(self.processed_image)) right_layout.addWidget(right_label) right_layout.addWidget(right_viewer) # 添加到分割器 splitter.addWidget(left_widget) splitter.addWidget(right_widget) splitter.setSizes([500, 500]) compare_window.setCentralWidget(splitter) compare_window.show() def about(self): """显示关于对话框""" QMessageBox.about(self, "关于数字图像处理系统", "数字图像处理系统\n\n" "基于OpenCV和PySide6开发\n" "支持图像转换、增强、复原、几何变换、形态学处理、分割和描述等功能\n\n" "版本: 1.0.0" ) def show_help(self): """显示帮助对话框""" help_text = ( "数字图像处理系统帮助文档\n\n" "1. 文件操作:\n" " - 打开: 从文件系统加载图像\n" " - 保存: 保存当前处理的图像\n" " - 另存为: 以新文件名保存图像\n\n" "2. 编辑操作:\n" " - 撤销: 撤销上一步操作\n" " - 重做: 恢复撤销的操作\n\n" "3. 图像处理:\n" " - 图像转换: 支持模式转换和分辨率调整\n" " - 图像增强: 包括直方图均衡化、滤波等\n" " - 图像复原: 处理运动模糊和噪声\n" " - 几何变换: 平移、旋转、缩放和镜像\n" " - 形态学处理: 腐蚀、膨胀、开/闭运算等\n" " - 图像分割: 阈值分割和区域分割\n" " - 图像描述: 计算不变矩和灰度共生矩阵\n\n" "4. 视图操作:\n" " - 放大/缩小: 调整图像显示大小\n" " - 适应窗口: 自动调整图像大小以适应窗口\n\n" "5. 比较功能:\n" " - 比较原图: 同显示原始图像和处理后图像进行对比" ) QMessageBox.information(self, "帮助", help_text) # 图像处理功能实现 def rgb_to_gray(self): """RGB转灰度""" if self.processed_image is None: return if len(self.processed_image.shape) == 2: QMessageBox.warning(self, "警告", "当前图像已经是灰度图像") return thread = ImageProcessingThread(cv2.cvtColor, self.processed_image, cv2.COLOR_BGR2GRAY) thread.finished.connect(lambda result: self._process_finished(result, "RGB转灰度")) thread.start() def resize_image(self): """调整图像分辨率""" if self.processed_image is None: return dialog = ProcessingDialog("调整分辨率") # 获取当前图像尺寸 current_width = self.processed_image.shape[1] current_height = self.processed_image.shape[0] # 添加宽度和高度输入框 width_spinbox = QSpinBox() width_spinbox.setRange(1, 10000) width_spinbox.setValue(current_width) height_spinbox = QSpinBox() height_spinbox.setRange(1, 10000) height_spinbox.setValue(current_height) # 保持比例复选框 keep_ratio_checkbox = QCheckBox("保持比例") keep_ratio_checkbox.setChecked(True) # 添加到对话框 dialog.layout.addWidget(QLabel("宽度:"), 0, 0) dialog.layout.addWidget(width_spinbox, 0, 1) dialog.layout.addWidget(QLabel("像素"), 0, 2) dialog.layout.addWidget(QLabel("高度:"), 1, 0) dialog.layout.addWidget(height_spinbox, 1, 1) dialog.layout.addWidget(QLabel("像素"), 1, 2) dialog.layout.addWidget(keep_ratio_checkbox, 2, 0, 1, 3) # 添加按钮 ok_button, cancel_button = dialog.add_button_box() # 保持比例功能 ratio = current_width / current_height def update_height(): if keep_ratio_checkbox.isChecked(): height_spinbox.setValue(int(width_spinbox.value() / ratio)) def update_width(): if keep_ratio_checkbox.isChecked(): width_spinbox.setValue(int(height_spinbox.value() * ratio)) width_spinbox.valueChanged.connect(update_height) height_spinbox.valueChanged.connect(update_width) if dialog.exec_(): new_width = width_spinbox.value() new_height = height_spinbox.value() # 添加插值方法选择 interpolation_dialog = ProcessingDialog("选择插值方法") methods = ["最近邻", "双线性", "双三次", "Lanczos"] method_combo = interpolation_dialog.add_combo_box("插值方法", methods) if interpolation_dialog.exec_(): method_index = method_combo.currentIndex() interpolation_methods = [ cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4 ] interpolation = interpolation_methods[method_index] thread = ImageProcessingThread(cv2.resize, self.processed_image, (new_width, new_height), interpolation=interpolation) thread.finished.connect(lambda result: self._process_finished(result, "调整分辨率")) thread.start() def histogram_equalization(self): """直方图均衡化""" if self.processed_image is None: return if len(self.processed_image.shape) == 3: # 彩色图像需要先转换到YUV空间 def equalize_color(image): yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV) yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR) thread = ImageProcessingThread(equalize_color, self.processed_image) else: # 灰度图像直接均衡化 thread = ImageProcessingThread(cv2.equalizeHist, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, "直方图均衡化")) thread.start() def log_transform(self): """对数变换""" if self.processed_image is None: return dialog = ProcessingDialog("对数变换") c_slider, c_spinbox = dialog.add_slider("常数C", 1, 100, 25) if dialog.exec_(): c = c_spinbox.value() def log_transform(image): # 确保图像是浮点类型 image_float = image.astype(np.float32) / 255.0 # 应用对数变换 result = c * np.log(1 + image_float) # 归一化到[0, 1] result = cv2.normalize(result, None, 0, 1, cv2.NORM_MINMAX) # 转回uint8 return (result * 255).astype(np.uint8) thread = ImageProcessingThread(log_transform, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, "对数变换")) thread.start() def power_law_transform(self): """幂律变换""" if self.processed_image is None: return dialog = ProcessingDialog("幂律变换") gamma_slider, gamma_spinbox = dialog.add_double_slider("伽马值", 0.1, 5.0, 1.0, 0.1, 1) if dialog.exec_(): gamma = gamma_spinbox.value() def power_law_transform(image): # 确保图像是浮点类型 image_float = image.astype(np.float32) / 255.0 # 应用幂律变换 result = np.power(image_float, gamma) # 归一化到[0, 1] result = cv2.normalize(result, None, 0, 1, cv2.NORM_MINMAX) # 转回uint8 return (result * 255).astype(np.uint8) thread = ImageProcessingThread(power_law_transform, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, f"幂律变换 (γ={gamma})")) thread.start() def spatial_filtering(self, filter_type): """空域滤波""" if self.processed_image is None: return dialog = ProcessingDialog(f"{filter_type}参数设置") if filter_type in ["均值滤波", "高斯滤波"]: kernel_size, _ = dialog.add_slider("核大小", 1, 21, 3, 2) # 奇数 elif filter_type == "中值滤波": kernel_size, _ = dialog.add_slider("孔径大小", 1, 21, 3, 2) # 奇数 elif filter_type in ["Sobel算子", "Prewitt算子"]: direction_combo = dialog.add_combo_box("方向", ["水平", "垂直", "两者"]) elif filter_type == "Laplacian算子": ksize_combo = dialog.add_combo_box("核大小", ["1", "3", "5", "7"], 1) if dialog.exec_(): if filter_type == "均值滤波": ksize = kernel_size.value() def mean_filter(image): return cv2.blur(image, (ksize, ksize)) thread = ImageProcessingThread(mean_filter, self.processed_image) elif filter_type == "高斯滤波": ksize = kernel_size.value() def gaussian_filter(image): return cv2.GaussianBlur(image, (ksize, ksize), 0) thread = ImageProcessingThread(gaussian_filter, self.processed_image) elif filter_type == "中值滤波": ksize = kernel_size.value() def median_filter(image): return cv2.medianBlur(image, ksize) thread = ImageProcessingThread(median_filter, self.processed_image) elif filter_type == "Sobel算子": direction = direction_combo.currentIndex() def sobel_filter(image): if len(image.shape) == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if direction == 0: # 水平 sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3) return cv2.convertScaleAbs(sobelx) elif direction == 1: # 垂直 sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3) return cv2.convertScaleAbs(sobely) else: # 两者 sobelx = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3) sobely = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3) return cv2.addWeighted(cv2.convertScaleAbs(sobelx), 0.5, cv2.convertScaleAbs(sobely), 0.5, 0) thread = ImageProcessingThread(sobel_filter, self.processed_image) elif filter_type == "Prewitt算子": direction = direction_combo.currentIndex() def prewitt_filter(image): if len(image.shape) == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) kernelx = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]], dtype=np.float32) kernely = np.array([[-1, -1, -1], [0, 0, 0], [1, 1, 1]], dtype=np.float32) if direction == 0: # 水平 prewittx = cv2.filter2D(image, -1, kernelx) return prewittx elif direction == 1: # 垂直 prewitty = cv2.filter2D(image, -1, kernely) return prewitty else: # 两者 prewittx = cv2.filter2D(image, -1, kernelx) prewitty = cv2.filter2D(image, -1, kernely) return cv2.addWeighted(prewittx, 0.5, prewitty, 0.5, 0) thread = ImageProcessingThread(prewitt_filter, self.processed_image) elif filter_type == "Laplacian算子": ksize = [1, 3, 5, 7][ksize_combo.currentIndex()] def laplacian_filter(image): if len(image.shape) == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) return cv2.Laplacian(image, cv2.CV_64F, ksize=ksize) thread = ImageProcessingThread(laplacian_filter, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, filter_type)) thread.start() def motion_deblur(self): """运动模糊复原""" if self.processed_image is None: return dialog = ProcessingDialog("运动模糊复原参数设置") length_slider, length_spinbox = dialog.add_slider("运动长度", 1, 100, 15) angle_slider, angle_spinbox = dialog.add_slider("运动角度", 0, 360, 0) gamma_slider, gamma_spinbox = dialog.add_double_slider("噪声功率谱比", 0.01, 10.0, 0.1, 0.01, 2) if dialog.exec_(): length = length_spinbox.value() angle = angle_spinbox.value() gamma = gamma_spinbox.value() def deblur(image): # 确保图像是灰度图 if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() # 创建运动模糊核 kernel = np.zeros((length, length), dtype=np.float32) kernel[int((length - 1) / 2), :] = np.ones(length, dtype=np.float32) # 旋转核 M = cv2.getRotationMatrix2D((length / 2, length / 2), angle, 1.0) kernel = cv2.warpAffine(kernel, M, (length, length)) # 归一化 kernel /= length # 傅里叶变换 fft = np.fft.fft2(gray) fft_kernel = np.fft.fft2(kernel, s=gray.shape) # 维纳滤波 H_conj = np.conj(fft_kernel) H_squared = np.abs(fft_kernel) ** 2 G = (H_conj / (H_squared + gamma)) * fft # 逆傅里叶变换 deblurred = np.fft.ifft2(G) deblurred = np.abs(deblurred) # 归一化到0-255 deblurred = cv2.normalize(deblurred, None, 0, 255, cv2.NORM_MINMAX) deblurred = deblurred.ast(np.uint8) return deblurred thread = ImageProcessingThread(deblur, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, "运动模糊复原")) thread.start() def noise_removal(self, noise_type): """噪声去除""" if self.processed_image is None: return dialog = ProcessingDialog(f"{noise_type}去除") if noise_type == "高斯噪声": kernel_size, _ = dialog.add_slider("核大小", 1, 21, 3, 2) # 奇数 elif noise_type == "椒盐噪声": kernel_size, _ = dialog.add_slider("核大小", 1, 21, 3, 2) # 奇数 if dialog.exec_(): ksize = kernel_size.value() if noise_type == "高斯噪声": def remove_gaussian_noise(image): return cv2.GaussianBlur(image, (ksize, ksize), 0) thread = ImageProcessingThread(remove_gaussian_noise, self.processed_image) elif noise_type == "椒盐噪声": def remove_salt_pepper_noise(image): return cv2.medianBlur(image, ksize) thread = ImageProcessingThread(remove_salt_pepper_noise, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, f"{noise_type}去除")) thread.start() def translate_image(self): """平移图像""" if self.processed_image is None: return dialog = ProcessingDialog("平移图像") tx_slider, tx_spinbox = dialog.add_slider("水平偏移", -500, 500, 0) ty_slider, ty_spinbox = dialog.add_slider("垂直偏移", -500, 500, 0) if dialog.exec_(): tx = tx_spinbox.value() ty = ty_spinbox.value() def translate(image): M = np.float32([[1, 0, tx], [0, 1, ty]]) return cv2.warpAffine(image, M, (image.shape[1], image.shape[0])) thread = ImageProcessingThread(translate, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, f"平移 (tx={tx}, ty={ty})")) thread.start() def rotate_image(self): """旋转图像""" if self.processed_image is None: return dialog = ProcessingDialog("旋转图像") angle_slider, angle_spinbox = dialog.add_slider("旋转角度", -180, 180, 0) scale_slider, scale_spinbox = dialog.add_double_slider("缩放比例", 0.1, 5.0, 1.0, 0.1, 1) if dialog.exec_(): angle = angle_spinbox.value() scale = scale_spinbox.value() def rotate(image): center = (image.shape[1] // 2, image.shape[0] // 2) M = cv2.getRotationMatrix2D(center, angle, scale) return cv2.warpAffine(image, M, (image.shape[1], image.shape[0])) thread = ImageProcessingThread(rotate, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, f"旋转 ({angle}°, 缩放{scale}x)")) thread.start() def scale_image(self): """缩放图像""" if self.processed_image is None: return dialog = ProcessingDialog("缩放图像") scale_slider, scale_spinbox = dialog.add_double_slider("缩放比例", 0.1, 5.0, 1.0, 0.1, 1) if dialog.exec_(): scale = scale_spinbox.value() def scale_image(image): new_width = int(image.shape[1] * scale) new_height = int(image.shape[0] * scale) return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR) thread = ImageProcessingThread(scale_image, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, f"缩放 ({scale}x)")) thread.start() def flip_image(self): """镜像图像""" if self.processed_image is None: return dialog = ProcessingDialog("镜像图像") flip_type = dialog.add_combo_box("镜像类型", ["水平", "垂直", "水平和垂直"]) if dialog.exec_(): flip_code = flip_type.currentIndex() def flip(image): return cv2.flip(image, flip_code) flip_types = ["水平", "垂直", "水平和垂直"] thread = ImageProcessingThread(flip, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, f"镜像 ({flip_types[flip_code]})")) thread.start() def morphological_operation(self, operation_type): """形态学操作""" if self.processed_image is None: return dialog = ProcessingDialog(f"{operation_type}参数设置") kernel_size, _ = dialog.add_slider("核大小", 1, 21, 3) kernel_shape = dialog.add_combo_box("核形状", ["矩形", "椭圆", "十字形"]) if dialog.exec_(): ksize = kernel_size.value() shape_index = kernel_shape.currentIndex() kernel_shapes = [ cv2.MORPH_RECT, cv2.MORPH_ELLIPSE, cv2.MORPH_CROSS ] kernel = cv2.getStructuringElement(kernel_shapes[shape_index], (ksize, ksize)) if operation_type == "腐蚀": def erode(image): return cv2.erode(image, kernel) thread = ImageProcessingThread(erode, self.processed_image) elif operation_type == "膨胀": def dilate(image): return cv2.dilate(image, kernel) thread = ImageProcessingThread(dilate, self.processed_image) elif operation_type == "开运算": def opening(image): return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) thread = ImageProcessingThread(opening, self.processed_image) elif operation_type == "闭运算": def closing(image): return cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel) thread = ImageProcessingThread(closing, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, operation_type)) thread.start() def edge_extraction(self): """边界提取""" if self.processed_image is None: return dialog = ProcessingDialog("边界提取参数设置") kernel_size, _ = dialog.add_slider("核大小", 1, 21, 3) kernel_shape = dialog.add_combo_box("核形状", ["矩形", "椭圆", "十字形"]) if dialog.exec_(): ksize = kernel_size.value() shape_index = kernel_shape.currentIndex() kernel_shapes = [ cv2.MORPH_RECT, cv2.MORPH_ELLIPSE, cv2.MORPH_CROSS ] kernel = cv2.getStructuringElement(kernel_shapes[shape_index], (ksize, ksize)) def extract_edge(image): if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() # 膨胀 dilated = cv2.dilate(gray, kernel) # 边界提取 return dilated - gray thread = ImageProcessingThread(extract_edge, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, "边界提取")) thread.start() def threshold_segmentation(self): """阈值分割""" if self.processed_image is None: return dialog = ProcessingDialog("阈值分割参数设置") threshold_slider, threshold_spinbox = dialog.add_slider("阈值", 0, 255, 127) max_value_slider, max_value_spinbox = dialog.add_slider("最大值", 0, 255, 255) threshold_type = dialog.add_combo_box("阈值类型", [ "二进制阈值", "反二进制阈值", "截断阈值", "零阈值", "反零阈值", "Otsu算法" ]) if dialog.exec_(): threshold = threshold_spinbox.value() max_value = max_value_spinbox.value() type_index = threshold_type.currentIndex() threshold_types = [ cv2.THRESH_BINARY, cv2.THRESH_BINARY_INV, cv2.THRESH_TRUNC, cv2.THRESH_TOZERO, cv2.THRESH_TOZERO_INV, cv2.THRESH_BINARY + cv2.THRESH_OTSU ] def threshold_segment(image): if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() if type_index == 5: # Otsu算法,忽略手动设置的阈值 _, thresh = cv2.threshold(gray, 0, max_value, threshold_types[type_index]) else: _, thresh = cv2.threshold(gray, threshold, max_value, threshold_types[type_index]) return thresh type_names = ["二进制阈值", "反二进制阈值", "截断阈值", "零阈值", "反零阈值", "Otsu算法"] thread = ImageProcessingThread(threshold_segment, self.processed_image) thread.finished.connect( lambda result: self._process_finished(result, f"阈值分割 ({type_names[type_index]})")) thread.start() def adaptive_threshold_segmentation(self): """自适应阈值分割""" if self.processed_image is None: return dialog = ProcessingDialog("自适应阈值分割参数设置") max_value_slider, max_value_spinbox = dialog.add_slider("最大值", 0, 255, 255) method_combo = dialog.add_combo_box("自适应方法", ["均值", "高斯"]) type_combo = dialog.add_combo_box("阈值类型", ["二进制阈值", "反二进制阈值"]) block_size_slider, block_size_spinbox = dialog.add_slider("块大小", 3, 101, 11, 2) # 奇数 c_slider, c_spinbox = dialog.add_double_slider("常数C", -10, 10, 2, 0.1, 1) if dialog.exec_(): max_value = max_value_spinbox.value() method_index = method_combo.currentIndex() type_index = type_combo.currentIndex() block_size = block_size_spinbox.value() c = c_spinbox.value() adaptive_methods = [ cv2.ADAPTIVE_THRESH_MEAN_C, cv2.ADAPTIVE_THRESH_GAUSSIAN_C ] threshold_types = [ cv2.THRESH_BINARY, cv2.THRESH_BINARY_INV ] def adaptive_threshold(image): if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() return cv2.adaptiveThreshold( gray, max_value, adaptive_methods[method_index], threshold_types[type_index], block_size, c ) method_names = ["均值", "高斯"] type_names = ["二进制阈值", "反二进制阈值"] thread = ImageProcessingThread(adaptive_threshold, self.processed_image) thread.finished.connect(lambda result: self._process_finished( result, f"自适应阈值分割 ({method_names[method_index]}, {type_names[type_index]})" )) thread.start() def watershed_segmentation(self): """分水岭分割""" if self.processed_image is None: return if len(self.processed_image.shape) != 3: QMessageBox.warning(self, "警告", "分水岭分割需要彩色图像") return dialog = ProcessingDialog("分水岭分割参数设置") threshold_slider, threshold_spinbox = dialog.add_slider("阈值", 0, 255, 100) morph_size_slider, morph_size_spinbox = dialog.add_slider("形态学操作核大小", 1, 21, 3) if dialog.exec_(): threshold = threshold_spinbox.value() morph_size = morph_size_spinbox.value() def watershed(image): # 转换为灰度图 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 阈值处理 ret, thresh = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # 噪声去除 kernel = np.ones((morph_size, morph_size), np.uint8) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2) # 确定背景区域 sure_bg = cv2.dilate(opening, kernel, iterations=3) # 确定前景区域 dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5) ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0) # 找到未知区域 sure_fg = np.uint8(sure_fg) unknown = cv2.subtract(sure_bg, sure_fg) # 标记标签 ret, markers = cv2.connectedComponents(sure_fg) # 为所有标签加1,确保背景不是0而是1 markers = markers + 1 # 将未知区域标记为0 markers[unknown == 255] = 0 # 应用分水岭算法 markers = cv2.watershed(image, markers) image[markers == -1] = [0, 0, 255] # 标记边界为红色 return image thread = ImageProcessingThread(watershed, self.processed_image) thread.finished.connect(lambda result: self._process_finished(result, "分水岭分割")) thread.start() def calculate_hu_moments(self): """计算Hu不变矩""" if self.processed_image is None: return def calculate_moments(image): if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() # 计算矩 moments = cv2.moments(gray) # 计算Hu不变矩 hu_moments = cv2.HuMoments(moments) # 对数变换,方便显示 for i in range(7): hu_moments[i] = -1 * np.copysign(1.0, hu_moments[i]) * np.log10(np.abs(hu_moments[i])) return hu_moments thread = ImageProcessingThread(calculate_moments, self.processed_image) thread.finished.connect(self._on_hu_moments_calculated) thread.start() def _on_hu_moments_calculated(self, hu_moments): """Hu不变矩计算完成后的回调""" result = "Hu不变矩:\n\n" for i, moment in enumerate(hu_moments): result += f"H{i + 1}: {moment[0]:.8f}\n" self.info_text.setText(result) self.history_tabs.setCurrentWidget(self.info_tab) self.statusBar().showMessage("Hu不变矩计算完成") def calculate_glcm(self): """计算灰度共生矩阵""" if self.processed_image is None: return thread = ImageProcessingThread(self._calculate_glcm_thread, self.processed_image) thread.finished.connect(self._on_glcm_calculated) thread.start() def _calculate_glcm_thread(self, image): """在线程中计算GLCM""" if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() # 量化到16级灰度以减少计算量 gray_quantized = (gray // 16).astype(np.uint8) # 计算GLCM矩阵 (距离=1, 角度=0, 45, 90, 135度) glcm = graycomatrix(gray_quantized, distances=[1], angles=[0, np.pi / 4, np.pi / 2, 3 * np.pi / 4], levels=16, symmetric=True, normed=True) return glcm def _on_glcm_calculated(self, glcm): """GLCM计算完成后的回调""" self.glcm_widget.update_glcm(self.processed_image) self.history_tabs.setCurrentWidget(self.glcm_tab) self.statusBar().showMessage("灰度共生矩阵计算完成") def _process_finished(self, result, operation_name): """图像处理完成后的回调""" self.processed_image = result self.display_image(result) self.add_to_history(result, operation_name) self.statusBar().showMessage(f"{operation_name}完成") if __name__ == "__main__": app = QApplication(sys.argv) window = MainWindow() window.show() sys.exit(app.exec())为什么运行后打开图片D:\python\python.exe D:\PythonProject\.venv\2.py 插入图片后闪退后显示 进程已结束,退出代码为 -1073740791 (0xC0000409)
06-09
import os import sys import json import gc import time import concurrent.futures import traceback import numpy as np import librosa import torch import psutil import noisereduce as nr from typing import List, Dict, Tuple, Optional, Any from pydub import AudioSegment, effects from pydub.silence import split_on_silence from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from transformers import AutoModelForSequenceClassification, AutoTokenizer from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QProgressBar, QGroupBox, QMessageBox, QListWidget, QSplitter, QTabWidget, QTableWidget, QTableWidgetItem, QHeaderView, QAction, QMenu, QToolBar, QComboBox, QSpinBox, QDialog, QDialogButtonBox) from PyQt5.QtCore import QThread, pyqtSignal, Qt from PyQt5.QtGui import QFont, QColor, QIcon from collections import deque import logging import shutil import subprocess import tempfile # 配置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger("DialectQA") # ====================== 工具函数 ====================== def check_ffmpeg_available() -> Tuple[bool, str]: """检查ffmpeg是否可用并返回检查结果和说明""" if not shutil.which("ffmpeg"): return False, "系统中未找到ffmpeg,请安装并添加到PATH" try: result = subprocess.run( ["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=3 ) if "ffmpeg version" in result.stdout: return True, "FFmpeg已正确安装并可用" return False, "FFmpeg可执行但返回异常输出" except (subprocess.TimeoutExpired, FileNotFoundError): return False, "FFmpeg执行失败" except Exception as e: return False, f"FFmpeg检查出错: {str(e)}" def is_gpu_available() -> bool: """检查GPU是否可用""" return torch.cuda.is_available() and torch.cuda.device_count() > 0 # ====================== 增强型资源监控器 ====================== class EnhancedResourceMonitor: def __init__(self): self.gpu_available = is_gpu_available() self.history_size = 60 # 保留60秒历史数据 self.cpu_history = deque(maxlen=self.history_size) self.gpu_history = deque(maxlen=self.history_size) self.last_check_time = time.time() def __del__(self): """析构释放资源""" if self.gpu_available: torch.cuda.empty_cache() def memory_percent(self) -> Dict[str, float]: """获取当前内存使用百分比""" try: result = {"cpu": psutil.virtual_memory().percent} if self.gpu_available: allocated = torch.cuda.memory_allocated() / (1024 ** 3) reserved = torch.cuda.memory_reserved() / (1024 ** 3) total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) gpu_usage = (allocated + reserved) / total * 100 if total > 0 else 0 result["gpu"] = gpu_usage else: result["gpu"] = 0.0 current_time = time.time() if current_time - self.last_check_time >= 1.0: self.cpu_history.append(result["cpu"]) if self.gpu_available: self.gpu_history.append(result["gpu"]) self.last_check_time = current_time return result except Exception as e: logger.error(f"内存监控失败: {str(e)}") return {"cpu": 0, "gpu": 0} def get_usage_trend(self) -> Dict[str, float]: """获取内存使用趋势(移动平均值)""" if not self.cpu_history: return {"cpu": 0, "gpu": 0} cpu_avg = sum(self.cpu_history) / len(self.cpu_history) gpu_avg = sum(self.gpu_history) / len(self.gpu_history) if self.gpu_available and self.gpu_history else 0 return {"cpu": cpu_avg, "gpu": gpu_avg} def is_under_heavy_load(self, threshold: float = 85.0) -> bool: """检查系统是否处于高负载状态""" current = self.memory_percent() trend = self.get_usage_trend() return any([ current["cpu"] > threshold, current["gpu"] > threshold, trend["cpu"] > threshold, trend["gpu"] > threshold ]) # ====================== 方言处理器(增强版) ====================== class EnhancedDialectProcessor: KEYWORDS = { "opening": ("您好", "很高兴为您服务", "请问有什么可以帮您", "麻烦您喽", "请问搞哪样", "有咋个可以帮您", "多谢喽", "你好", "早上好", "下午好", "晚上好"), "closing": ("感谢来电", "祝您生活愉快", "再见", "搞归一喽", "麻烦您喽", "再见喽", "慢走喽", "谢谢", "拜拜"), "forbidden": ("不知道", "没办法", "你投诉吧", "随便你", "搞不成", "没得法", "随便你喽", "你投诉吧喽", "我不懂", "自己看"), "salutation": ("先生", "女士", "小姐", "老师", "师傅", "哥", "姐", "兄弟", "妹儿", "老板", "同志"), "reassurance": ("非常抱歉", "请不要着急", "我们会尽快处理", "理解您的心情", "实在对不住", "莫急哈", "马上帮您整", "理解您得很", "不好意思", "请您谅解", "我们会尽快解决") } # 扩展贵州方言到普通话的映射 _DIALECT_ITEMS = ( ("恼火得很", "非常生气"), ("鬼火戳", "很愤怒"), ("搞不成", "无法完成"), ("没得", "没有"), ("搞哪样嘛", "做什么呢"), ("归一喽", "完成了"), ("咋个", "怎么"), ("克哪点", "去哪里"), ("麻烦您喽", "麻烦您了"), ("多谢喽", "多谢了"), ("憨包", "傻瓜"), ("归一", "结束"), ("板扎", "很好"), ("鬼火冒", "非常生气"), ("背", "倒霉"), ("吃豁皮", "占便宜"), ("扯拐", "出问题"), ("打脑壳", "头疼"), ("二天", "以后"), ("鬼火绿", "非常生气"), ("哈数", "规矩"), ("经事", "耐用"), ("抠脑壳", "思考"), ("拉稀摆带", "不靠谱"), ("马起脸", "板着脸"), ("哦豁", "哎呀"), ("皮坨", "拳头"), ("千翻", "顽皮"), ("日鼓鼓", "生气"), ("煞角", "结束"), ("舔肥", "巴结"), ("弯酸", "刁难"), ("歪得很", "凶"), ("悬掉掉", "危险"), ("妖艳儿", "炫耀"), ("渣渣", "垃圾") ) class TrieNode: __slots__ = ('children', 'is_end', 'value') def __init__(self): self.children = {} self.is_end = False self.value = "" # 类加载直接构建Trie树 _trie_root = TrieNode() for dialect, standard in sorted(_DIALECT_ITEMS, key=lambda x: len(x[0]), reverse=True): node = _trie_root for char in dialect: if char not in node.children: node.children[char] = EnhancedDialectProcessor.TrieNode() node = node.children[char] node.is_end = True node.value = standard @classmethod def preprocess_text(cls, texts: List[str]) -> List[str]: """使用预构建的Trie树进行方言转换""" return [cls._process_single_text(text) for text in texts] @classmethod def _process_single_text(cls, text: str) -> str: """处理单个文本的核心逻辑""" result = [] i = 0 n = len(text) while i < n: node = cls._trie_root j = i last_match = None # 查找最长匹配 while j < n and text[j] in node.children: node = node.children[text[j]] j += 1 if node.is_end: last_match = (j, node.value) if last_match: end_index, replacement = last_match result.append(replacement) i = end_index else: result.append(text[i]) i += 1 return ''.join(result) # ====================== 系统配置管理器 ====================== class ConfigManager: __slots__ = ('config', 'dirty') _instance = None _DEFAULT_CONFIG = { "model_paths": { "asr": "D:/models/ASR-models/iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn", "sentiment": "D:/models/distilbert-base-multilingual-cased-sentiments-student" }, "sample_rate": 16000, "silence_thresh": -40, "min_silence_len": 1000, "max_concurrent": 1, "max_audio_duration": 3600, "enable_fp16": True, "enable_quantization": True, "max_sentiment_batch_size": 16 } def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance.dirty = False cls._instance.config = cls._DEFAULT_CONFIG.copy() cls._instance.load_config() return cls._instance def load_config(self): """加载配置文件""" try: if os.path.exists("config.json"): with open("config.json", "r", encoding="utf-8") as f: file_config = json.load(f) # 深度合并配置 for key, value in file_config.items(): if key in self.config and isinstance(self.config[key], dict) and isinstance(value, dict): self.config[key].update(value) else: self.config[key] = value except json.JSONDecodeError: logger.warning("配置文件格式错误,部分使用默认配置") except Exception as e: logger.error(f"加载配置失败: {str(e)},部分使用默认配置") def save_config(self, force=False): """延迟保存机制:仅当配置变化保存""" if not force and not self.dirty: return try: with open("config.json", "w", encoding="utf-8") as f: json.dump(self.config, f, indent=2, ensure_ascii=False) self.dirty = False except Exception as e: logger.error(f"保存配置失败: {str(e)}") def get(self, key: str, default=None): return self.config.get(key, default) def set(self, key: str, value, immediate_save=False): self.config[key] = value self.dirty = True if immediate_save: self.save_config(force=True) def check_model_paths(self) -> Tuple[bool, List[str]]: errors = [] model_paths = self.get("model_paths", {}) for model_name, path in model_paths.items(): if not path: errors.append(f"{model_name}模型路径未设置") elif not os.path.exists(path): errors.append(f"{model_name}模型路径不存在: {path}") elif not os.path.isdir(path): errors.append(f"{model_name}模型路径不是有效的目录: {path}") return len(errors) == 0, errors def __del__(self): """析构自动保存未持久化的更改""" if self.dirty: self.save_config(force=True) # ====================== 增强型音频处理器 ====================== class EnhancedAudioProcessor: SUPPORTED_FORMATS = ('.mp3', '.wav', '.amr', '.m4a') MAX_SEGMENT_DURATION = 5 * 60 * 1000 # 5分钟分段限制 ENHANCEMENT_CONFIG = { 'noise_sample_duration': 0.5, # 噪声采样长(秒) 'telephone_filter_range': (300, 3400), # 电话频段范围(Hz) 'compression_threshold': -25.0, # 压缩阈值(dBFS) 'compression_ratio': 3.0 # 压缩比 } def __init__(self): self._noise_profile = None self._sample_rate = ConfigManager().get("sample_rate", 16000) @staticmethod def check_dependencies(): try: # 尝试导入所需库 import librosa import noisereduce return True, "依赖检查通过" except ImportError as e: return False, f"缺少依赖库: {str(e)}" def process_audio(self, input_path: str, temp_dir: str) -> Optional[List[str]]: """处理音频文件并返回分段文件路径列表""" if not self._validate_input(input_path, temp_dir): return None try: # 使用临目录处理音频 with tempfile.TemporaryDirectory() as process_dir: audio = self._load_audio(input_path) if audio is None: return None # 基础预处理 audio = self._basic_preprocessing(audio) # 音频增强处理 audio = self._enhance_audio(audio) # 分段并保存 return self._segment_audio(audio, input_path, temp_dir or process_dir) except Exception as e: logger.error(f"音频处理失败: {str(e)}", exc_info=True) return None def _validate_input(self, input_path: str, temp_dir: str) -> bool: """验证输入参数有效性""" ffmpeg_available, ffmpeg_msg = check_ffmpeg_available() if not ffmpeg_available: logger.error(f"ffmpeg错误: {ffmpeg_msg}") return False deps_ok, deps_msg = self.check_dependencies() if not deps_ok: logger.error(f"依赖错误: {deps_msg}") return False os.makedirs(temp_dir, exist_ok=True) ext = os.path.splitext(input_path)[1].lower() if ext not in self.SUPPORTED_FORMATS: logger.error(f"不支持的音频格式: {ext}") return False if not os.path.exists(input_path): logger.error(f"文件不存在: {input_path}") return False return True def _load_audio(self, input_path: str) -> Optional[AudioSegment]: """加载音频文件""" try: return AudioSegment.from_file(input_path) except Exception as e: logger.error(f"无法加载音频文件: {str(e)}") return None def _basic_preprocessing(self, audio: AudioSegment) -> AudioSegment: """基础预处理:统一采样率和通道数""" # 确保音频为单声道 if audio.channels > 1: audio = audio.set_channels(1) # 统一采样率 if audio.frame_rate != self._sample_rate: audio = audio.set_frame_rate(self._sample_rate) return audio def _enhance_audio(self, audio: AudioSegment) -> AudioSegment: """执行音频增强处理流水线""" self._analyze_noise_profile(audio) audio = self._extract_main_voice(audio) audio = self._enhance_telephone_quality(audio) return self._normalize_audio(audio) def _analyze_noise_profile(self, audio: AudioSegment): """分析噪声样本以创建噪声剖面""" try: samples = np.array(audio.get_array_of_samples()) sr = audio.frame_rate noise_duration = int(sr * self.ENHANCEMENT_CONFIG['noise_sample_duration']) self._noise_profile = samples[:min(noise_duration, len(samples))].astype(np.float32) except Exception as e: logger.warning(f"噪声分析失败: {str(e)}") self._noise_profile = None def _extract_main_voice(self, audio: AudioSegment) -> AudioSegment: """从音频中提取主要人声""" if self._noise_profile is None: logger.warning("无噪声样本可用,跳过说话人提取") return audio try: samples = np.array(audio.get_array_of_samples()) sr = audio.frame_rate reduced_noise = nr.reduce_noise( y=samples.astype(np.float32), sr=sr, y_noise=self._noise_profile, prop_decrease=0.8 ) return AudioSegment( reduced_noise.astype(np.int16).tobytes(), frame_rate=sr, sample_width=2, channels=1 ) except Exception as e: logger.warning(f"降噪处理失败: {str(e)}") return audio def _enhance_telephone_quality(self, audio: AudioSegment) -> AudioSegment: """增强电话语音质量(带通滤波)""" try: low, high = self.ENHANCEMENT_CONFIG['telephone_filter_range'] return audio.low_pass_filter(high).high_pass_filter(low) except Exception as e: logger.warning(f"电话质量增强失败: {str(e)}") return audio def _normalize_audio(self, audio: AudioSegment) -> AudioSegment: """音频归一化处理""" try: # 动态范围压缩 audio = effects.compress_dynamic_range( audio, threshold=self.ENHANCEMENT_CONFIG['compression_threshold'], ratio=self.ENHANCEMENT_CONFIG['compression_ratio'] ) # 标准化音量 return effects.normalize(audio) except Exception as e: logger.warning(f"音频标准化失败: {str(e)}") return audio def _segment_audio(self, audio: AudioSegment, input_path: str, output_dir: str) -> List[str]: """根据静音分割音频""" min_silence_len = ConfigManager().get("min_silence_len", 1000) silence_thresh = ConfigManager().get("silence_thresh", -40) try: segments = split_on_silence( audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh, keep_silence=500 ) # 确保分段不超过5分钟 merged_segments = [] current_segment = AudioSegment.silent(duration=0, frame_rate=self._sample_rate) for seg in segments: if len(current_segment) + len(seg) <= self.MAX_SEGMENT_DURATION: current_segment += seg else: merged_segments.append(current_segment) current_segment = seg if len(current_segment) > 0: merged_segments.append(current_segment) # 保存分段 output_files = [] base_name = os.path.splitext(os.path.basename(input_path))[0] for i, seg in enumerate(merged_segments): output_file = os.path.join(output_dir, f"{base_name}_segment_{i + 1}.wav") seg.export(output_file, format="wav") output_files.append(output_file) return output_files except Exception as e: logger.error(f"音频分割失败: {str(e)}") return [] # ====================== ASR处理器 ====================== class ASRProcessor: def __init__(self): self.config = ConfigManager() self._asr_pipeline = None self._gpu_available = is_gpu_available() self._initialize_pipeline() def _initialize_pipeline(self): """初始化ASR管道""" model_path = self.config.get("model_paths", {}).get("asr") if not model_path: logger.error("未配置ASR模型路径") return try: device = "gpu" if self._gpu_available else "cpu" self._asr_pipeline = pipeline( task=Tasks.auto_speech_recognition, model=model_path, device=device ) logger.info(f"ASR模型初始化完成,使用设备: {device}") except Exception as e: logger.error(f"ASR模型初始化失败: {str(e)}") self._asr_pipeline = None def transcribe(self, audio_path: str) -> Optional[str]: """转录单个音频文件""" if not self._asr_pipeline: logger.error("ASR管道未初始化") return None try: result = self._asr_pipeline(audio_path) return result.get('text', '') except Exception as e: logger.error(f"音频转录失败: {str(e)}") return None def batch_transcribe(self, audio_files: List[str]) -> List[Optional[str]]: """批量转录音频文件""" if not self._asr_pipeline: logger.error("ASR管道未初始化") return [None] * len(audio_files) results = [] for audio_file in audio_files: results.append(self.transcribe(audio_file)) # 转录后立即释放内存 torch.cuda.empty_cache() if self._gpu_available else gc.collect() return results # ====================== 情感分析器 ====================== class SentimentAnalyzer: def __init__(self): self.config = ConfigManager() self._tokenizer = None self._model = None self._gpu_available = is_gpu_available() self._initialize_model() def _initialize_model(self): """初始化情感分析模型""" model_path = self.config.get("model_paths", {}).get("sentiment") if not model_path: logger.error("未配置情感分析模型路径") return try: self._tokenizer = AutoTokenizer.from_pretrained(model_path) self._model = AutoModelForSequenceClassification.from_pretrained(model_path) if self._gpu_available: self._model = self._model.cuda() logger.info("情感分析模型初始化完成") except Exception as e: logger.error(f"情感分析模型初始化失败: {str(e)}") self._tokenizer = None self._model = None def analyze(self, texts: List[str]) -> List[Dict[str, float]]: """分析文本情感""" if not self._model or not self._tokenizer: logger.error("情感分析模型未初始化") return [{"positive": 0.0, "negative": 0.0, "neutral": 0.0}] * len(texts) try: # 分批处理 batch_size = self.config.get("max_sentiment_batch_size", 16) results = [] for i in range(0, len(texts), batch_size): batch = texts[i:i + batch_size] inputs = self._tokenizer( batch, padding=True, truncation=True, max_length=128, return_tensors="pt" ) if self._gpu_available: inputs = {k: v.cuda() for k, v in inputs.items()} with torch.no_grad(): outputs = self._model(**inputs) # 获取概率分布 probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy() # 转换为字典格式 for j in range(probs.shape[0]): results.append({ "negative": float(probs[j][0]), "neutral": float(probs[j][1]), "positive": float(probs[j][2]) }) return results except Exception as e: logger.error(f"情感分析失败: {str(e)}") return [{"positive": 0.0, "negative": 0.0, "neutral": 0.0}] * len(texts) # ====================== 核心处理线程 ====================== class ProcessingThread(QThread): progress = pyqtSignal(int, str) finished = pyqtSignal(dict) error = pyqtSignal(str) def __init__(self, audio_path: str): super().__init__() self.audio_path = audio_path self.resource_monitor = EnhancedResourceMonitor() self._stop_requested = False def run(self): """处理流程主函数""" try: # 1. 初始化配置 config = ConfigManager() ok, errors = config.check_model_paths() if not ok: self.error.emit(f"模型路径配置错误: {'; '.join(errors)}") return # 2. 创建临目录 temp_dir = tempfile.mkdtemp(prefix="dialectqa_") self.progress.emit(10, "创建临目录完成") # 3. 预处理音频 audio_processor = EnhancedAudioProcessor() segments = audio_processor.process_audio(self.audio_path, temp_dir) if not segments: self.error.emit("音频预处理失败") return self.progress.emit(30, f"音频预处理完成,生成{len(segments)}个分段") # 4. ASR转录 asr = ASRProcessor() transcripts = asr.batch_transcribe(segments) if not any(transcripts): self.error.emit("ASR转录失败") return self.progress.emit(50, f"转录完成,总计{len(''.join(transcripts))}字") # 5. 方言预处理 transcripts = EnhancedDialectProcessor.preprocess_text(transcripts) self.progress.emit(60, "方言转换完成") # 6. 情感分析 sentiment = SentimentAnalyzer() sentiments = sentiment.analyze(transcripts) self.progress.emit(80, "情感分析完成") # 7. 关键字检测 keywords_stats = self._analyze_keywords(transcripts) self.progress.emit(90, "关键字检测完成") # 8. 结果汇总 result = { "audio_path": self.audio_path, "segments": segments, "transcripts": transcripts, "sentiments": sentiments, "keywords": keywords_stats } # 9. 清理资源 gc.collect() if self._gpu_available: torch.cuda.empty_cache() self.finished.emit(result) self.progress.emit(100, "处理完成") except Exception as e: self.error.emit(f"处理失败: {str(e)}\n{traceback.format_exc()}") finally: # 延迟清理临目录(实际应用中可能需要保留结果) pass def _analyze_keywords(self, transcripts: List[str]) -> Dict[str, int]: """分析关键字出现频率""" stats = {category: 0 for category in EnhancedDialectProcessor.KEYWORDS} full_text = "".join(transcripts) for category, keywords in EnhancedDialectProcessor.KEYWORDS.items(): for kw in keywords: stats[category] += full_text.count(kw) return stats def stop(self): """请求停止处理""" self._stop_requested = True self.terminate() # ====================== 主界面 ====================== class DialectQAAnalyzer(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("方言客服语音质量分析系统") self.setGeometry(100, 100, 1200, 800) self.setWindowIcon(QIcon("icon.png")) # 初始化状态 self.audio_path = "" self.processing_thread = None self.results = None self._init_ui() self.check_dependencies() self.show() def _init_ui(self): """初始化用户界面""" # 创建主布局 main_widget = QWidget(self) main_layout = QVBoxLayout(main_widget) # 创建选项卡 tab_widget = QTabWidget() main_layout.addWidget(tab_widget) # 创建输入选项卡 input_tab = QWidget() input_layout = QVBoxLayout(input_tab) tab_widget.addTab(input_tab, "输入") # 音频选择区域 audio_group = QGroupBox("音频文件") audio_layout = QHBoxLayout(audio_group) self.audio_path_edit = QLineEdit() self.audio_path_edit.setReadOnly(True) audio_layout.addWidget(self.audio_path_edit, 4) browse_btn = QPushButton("浏览...") browse_btn.clicked.connect(self.select_audio) audio_layout.addWidget(browse_btn, 1) input_layout.addWidget(audio_group) # 进度区域 progress_group = QGroupBox("处理进度") progress_layout = QVBoxLayout(progress_group) self.progress_bar = QProgressBar() self.progress_bar.setRange(0, 100) self.progress_text = QLabel("准备就绪") progress_layout.addWidget(self.progress_bar) progress_layout.addWidget(self.progress_text) input_layout.addWidget(progress_group) # 操作按钮 button_layout = QHBoxLayout() self.start_btn = QPushButton("开始分析") self.start_btn.clicked.connect(self.start_processing) self.start_btn.setEnabled(False) self.stop_btn = QPushButton("停止分析") self.stop_btn.clicked.connect(self.stop_processing) self.stop_btn.setEnabled(False) button_layout.addWidget(self.start_btn) button_layout.addWidget(self.stop_btn) input_layout.addLayout(button_layout) # 结果预览区域 preview_group = QGroupBox("预览") preview_layout = QVBoxLayout(preview_group) self.preview_text = QTextEdit() self.preview_text.setReadOnly(True) preview_layout.addWidget(self.preview_text) input_layout.addWidget(preview_group) # 结果选项卡 result_tab = QWidget() result_layout = QVBoxLayout(result_tab) tab_widget.addTab(result_tab, "详细结果") # 结果表格 result_group = QGroupBox("分析明细") result_layout = QVBoxLayout(result_group) self.results_table = QTableWidget() self.results_table.setColumnCount(5) self.results_table.setHorizontalHeaderLabels(["分段", "文本内容", "积极", "中性", "消极"]) self.results_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) result_layout.addWidget(self.results_table) result_layout.addWidget(result_group) # 关键字统计 keywords_group = QGroupBox("关键字统计") keywords_layout = QVBoxLayout(keywords_group) self.keywords_table = QTableWidget() self.keywords_table.setColumnCount(2) self.keywords_table.setHorizontalHeaderLabels(["类别", "出现次数"]) self.keywords_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) keywords_layout.addWidget(self.keywords_table) result_layout.addWidget(keywords_group) # 状态栏 self.statusBar().showMessage("就绪") # 设置中心控件 self.setCentralWidget(main_widget) def check_dependencies(self): """检查系统依赖""" # 检查GPU if not is_gpu_available(): self.statusBar().showMessage("警告: 未检测到GPU,将使用CPU模式运行", 10000) # 检查FFmpeg ffmpeg_ok, ffmpeg_msg = check_ffmpeg_available() if not ffmpeg_ok: QMessageBox.warning(self, "依赖缺失", ffmpeg_msg) # 检查模型路径 config = ConfigManager() ok, errors = config.check_model_paths() if not ok: QMessageBox.warning(self, "配置错误", "\n".join(errors)) def select_audio(self): """选择音频文件""" file_path, _ = QFileDialog.getOpenFileName( self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.m4a)" ) if file_path: self.audio_path = file_path self.audio_path_edit.setText(file_path) self.start_btn.setEnabled(True) self.preview_text.setText(f"已选择文件: {file_path}") def start_processing(self): """开始处理音频""" if not self.audio_path: QMessageBox.warning(self, "错误", "请先选择音频文件") return # 禁用UI按钮 self.start_btn.setEnabled(False) self.stop_btn.setEnabled(True) self.preview_text.clear() # 创建处理线程 self.processing_thread = ProcessingThread(self.audio_path) self.processing_thread.progress.connect(self.update_progress) self.processing_thread.finished.connect(self.on_processing_finished) self.processing_thread.error.connect(self.on_processing_error) self.processing_thread.start() self.statusBar().showMessage("处理中...") def stop_processing(self): """停止处理""" if self.processing_thread and self.processing_thread.isRunning(): self.processing_thread.stop() self.stop_btn.setEnabled(False) self.statusBar().showMessage("已停止处理") def update_progress(self, value: int, message: str): """更新进度""" self.progress_bar.setValue(value) self.progress_text.setText(message) self.preview_text.append(message) def on_processing_finished(self, result: dict): """处理完成事件""" self.results = result self.stop_btn.setEnabled(False) self.start_btn.setEnabled(True) self.statusBar().showMessage("处理完成") # 更新结果表格 self.update_results_table() # 显示成功消息 QMessageBox.information(self, "完成", f"分析完成!\n音频长: {self.calculate_audio_duration()}秒\n总字数: {len(''.join(result['transcripts']))}字") def on_processing_error(self, error: str): """处理错误事件""" self.stop_btn.setEnabled(False) self.start_btn.setEnabled(True) self.statusBar().showMessage("处理失败") # 显示错误详情 error_dialog = QDialog(self) error_dialog.setWindowTitle("处理错误") layout = QVBoxLayout() text_edit = QTextEdit() text_edit.setPlainText(error) text_edit.setReadOnly(True) layout.addWidget(text_edit) buttons = QDialogButtonBox(QDialogButtonBox.Ok) buttons.accepted.connect(error_dialog.accept) layout.addWidget(buttons) error_dialog.setLayout(layout) error_dialog.exec() def update_results_table(self): """更新结果表格""" if not self.results: return # 更新分段结果表格 segments = self.results.get("segments", []) transcripts = self.results.get("transcripts", []) sentiments = self.results.get("sentiments", []) self.results_table.setRowCount(len(segments)) for i in range(len(segments)): # 分段编号 self.results_table.setItem(i, 0, QTableWidgetItem(f"分段 {i + 1}")) # 文本内容 self.results_table.setItem(i, 1, QTableWidgetItem(transcripts[i])) # 情感分析结果 if i < len(sentiments): sentiment = sentiments[i] self.results_table.setItem(i, 2, QTableWidgetItem(f"{sentiment['positive'] * 100:.1f}%")) self.results_table.setItem(i, 3, QTableWidgetItem(f"{sentiment['neutral'] * 100:.1f}%")) self.results_table.setItem(i, 4, QTableWidgetItem(f"{sentiment['negative'] * 100:.1f}%")) # 更新关键字统计表格 keywords = self.results.get("keywords", {}) self.keywords_table.setRowCount(len(keywords)) for i, (category, count) in enumerate(keywords.items()): # 类别名称 self.keywords_table.setItem(i, 0, QTableWidgetItem(self._translate_category(category))) # 出现次数 self.keywords_table.setItem(i, 1, QTableWidgetItem(str(count))) # 根据次数设置颜色 if count > 0: for j in range(2): self.keywords_table.item(i, j).setBackground(QColor(255, 230, 230)) def _translate_category(self, category: str) -> str: """翻译关键字类别名称""" translations = { "opening": "开场白", "closing": "结束语", "forbidden": "禁用语", "salutation": "称呼语", "reassurance": "安抚语" } return translations.get(category, category) def calculate_audio_duration(self) -> float: """计算音频总长(秒)""" if not self.audio_path or not os.path.exists(self.audio_path): return 0.0 try: audio = AudioSegment.from_file(self.audio_path) return len(audio) / 1000.0 # 转换为秒 except: return 0.0 # ====================== 主程序入口 ====================== @staticmethod def main(): # 启用高分屏支持 os.environ["QT_ENABLE_HIGHDPI_SCALING"] = "1" QApplication.setHighDpiScaleFactorRoundingPolicy(Qt.HighDpiScaleFactorRoundingPolicy.PassThrough) app = QApplication(sys.argv) app.setFont(QFont("Microsoft YaHei UI", 9)) # 设置默认字体 # 创建主窗口 window = DialectQAAnalyzer() window.show() # 检查资源 monitor = EnhancedResourceMonitor() if monitor.is_under_heavy_load(): QMessageBox.warning(window, "系统警告", "当前系统资源负载较高,性能可能受影响") # 运行应用 sys.exit(app.exec_()) if __name__ == "__main__": try: DialectQAAnalyzer.main() # 调用静态方法 except Exception as e: error_msg = f"致命错误: {str(e)}\n{traceback.format_exc()}" logger.critical(error_msg) # 创建临错误报告 temp_file = os.path.join(os.getcwd(), "crash_report.txt") with open(temp_file, "w", encoding="utf-8") as f: f.write(error_msg) # 显示错误对话框 app = QApplication(sys.argv) msg_box = QMessageBox() msg_box.setIcon(QMessageBox.Critical) msg_box.setWindowTitle("系统崩溃") msg_box.setText("程序遇到致命错误,已终止运行") msg_box.setInformativeText(f"错误报告已保存到: {temp_file}") msg_box.exec() 运行以上代码错先错误提示: 未解析的引用 'EnhancedDialectProcessor':164行
最新发布
09-09
根据修改建议进行修改: 1、AnalysisThread.add_charts 中生成图表后虽删除临文件,但未显式释放图表对象,可能导致内存泄漏(尤其批量处理)。使用 plt.close(fig) 显式关闭图表,释放内存 2、AnalysisThread.stop() 仅设置 stop_requested=True,但线程池中的任务仍会继续运行,可能导致资源占用。结合 concurrent.futures 的 Executor.shutdown(wait=False) 强制终止线程池。 3、AudioAnalyzer.convert_audio 中若临目录创建失败,temp_dir 为 None,后续 temp_dir.cleanup() 会报错。添加容错处理,确保临目录安全清理。 4、ModelLoader 中加载的 Whisper 模型未充分利用硬件性能(如未指定 language 参数,可能增加语言检测耗)。指定 language=“chinese” 减少推理间。 5、convert_audio 中无论原始音频格式如何,均强制转换为 WAV,部分格式(如 16kHz 单声道 WAV)可跳过转换。 6、短间内同一说话人的片段可能被拆分(如停顿导致),影响后续文本关联准确性。合并连续相同说话人的片段(如间隔 < 1 秒)。 7、当前情感分析仅基于文本内容,未结合对话上下文(如客户抱怨后客服回应的情感)。增加上下文权重,如客户表达消极情绪后,客服的回应情感权重提升。 8、模型加载失败后无重试逻辑,用户需重启程序。添加重试按钮,允许用户重新加载模型。 9、未校验音频文件的有效性(如损坏文件、非音频文件),可能导致分析线程崩溃。添加文件合法性校验,过滤无效文件。 代码: import os import sys import time import json import traceback import numpy as np import pandas as pd import torch import librosa import jieba import tempfile from pydub import AudioSegment from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer from pyannote.audio import Pipeline from concurrent.futures import ThreadPoolExecutor, as_completed from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QPushButton, QFileDialog, QTextEdit, QProgressBar, QGroupBox, QCheckBox, QListWidget, QMessageBox) from PyQt5.QtCore import QThread, pyqtSignal, Qt, QTimer from PyQt5.QtGui import QFont from docx import Document from docx.shared import Inches import matplotlib.pyplot as plt from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from collections import Counter # 全局配置 MODEL_CONFIG = { "whisper_model": "openai/whisper-small", "diarization_model": "pyannote/speaker-diarization@2.1-base", # 使用更轻量模型 "sentiment_model": "IDEA-CCNL/Erlangshen-Roberta-110M-Sentiment", "chunk_size": 10, # 强制10秒分块 "sample_rate": 16000, "device": "cuda" if torch.cuda.is_available() else "cpu", "max_workers": 2 if torch.cuda.is_available() else 4, # GPU模式下并行度降低 "batch_size": 8 # 批处理大小 } # 初始化分词器 jieba.initialize() class ModelLoader(QThread): """模型加载线程""" progress = pyqtSignal(str) finished = pyqtSignal(bool, str) def __init__(self): super().__init__() self.models = {} self.error = None def run(self): try: self.progress.emit("正在加载语音识别模型...") # 语音识别模型 self.models["asr_pipeline"] = pipeline( "automatic-speech-recognition", model=MODEL_CONFIG["whisper_model"], torch_dtype=torch.float16, device=MODEL_CONFIG["device"], batch_size=MODEL_CONFIG["batch_size"] # 添加批处理支持 ) self.progress.emit("正在加载说话人分离模型...") # 说话人分离模型 - 使用更轻量版本 self.models["diarization_pipeline"] = Pipeline.from_pretrained( MODEL_CONFIG["diarization_model"], use_auth_token=True ).to(torch.device(MODEL_CONFIG["device"]), torch.float16) self.progress.emit("正在加载情感分析模型...") # 情感分析模型 self.models["sentiment_tokenizer"] = AutoTokenizer.from_pretrained( MODEL_CONFIG["sentiment_model"] ) self.models["sentiment_model"] = AutoModelForSequenceClassification.from_pretrained( MODEL_CONFIG["sentiment_model"], torch_dtype=torch.float16 ).to(MODEL_CONFIG["device"]) self.finished.emit(True, "模型加载完成!") except Exception as e: self.error = str(e) traceback.print_exc() self.finished.emit(False, f"模型加载失败: {str(e)}") class AudioAnalyzer: """深度优化的核心音频分析类""" def __init__(self, models): self.keywords = { "opening": ["您好", "请问是", "先生/女士", "很高兴为您服务"], "closing": ["感谢接听", "祝您生活愉快", "再见", "有问题随联系"], "forbidden": ["不可能", "没办法", "我不管", "随便你", "投诉也没用"], "solution": ["解决", "处理好了", "已完成", "满意吗", "还有问题吗"] } self.synonyms = { "不可能": ["不可能", "没可能", "做不到", "无法做到"], "解决": ["解决", "处理", "完成", "搞定", "办妥"] } self.models = models self.models_loaded = True if models else False def load_keywords(self, excel_path): """从Excel加载关键词和同义词""" try: # 使用更健壮的Excel读取方式 df = pd.read_excel(excel_path, sheet_name=None) if "开场白" in df: self.keywords["opening"] = df["开场白"].dropna()["关键词"].tolist() if "结束语" in df: self.keywords["closing"] = df["结束语"].dropna()["关键词"].tolist() if "禁语" in df: self.keywords["forbidden"] = df["禁语"].dropna()["关键词"].tolist() if "解决关键词" in df: self.keywords["solution"] = df["解决关键词"].dropna()["关键词"].tolist() # 加载同义词表 if "同义词" in df: for _, row in df["同义词"].iterrows(): main_word = row["主词"] synonyms = row["同义词"].split("、") self.synonyms[main_word] = synonyms return True, "关键词加载成功" except Exception as e: error_msg = f"加载关键词失败: {str(e)}" return False, error_msg def convert_audio(self, input_path): """转换音频为WAV格式并分块,使用临目录管理""" try: # 创建临目录 temp_dir = tempfile.TemporaryDirectory() # 读取音频文件 audio = AudioSegment.from_file(input_path) # 转换为单声道16kHz audio = audio.set_frame_rate(MODEL_CONFIG["sample_rate"]) audio = audio.set_channels(1) # 计算总长 duration = len(audio) / 1000.0 # 毫秒转秒 # 分块处理(10秒) chunks = [] chunk_size = MODEL_CONFIG["chunk_size"] * 1000 # 毫秒 for i in range(0, len(audio), chunk_size): chunk = audio[i:i + chunk_size] chunk_path = os.path.join(temp_dir.name, f"chunk_{i // chunk_size}.wav") chunk.export(chunk_path, format="wav") chunks.append({ "path": chunk_path, "start_time": i / 1000.0, # 全局起始间(秒) "end_time": (i + len(chunk)) / 1000.0 # 全局结束间(秒) }) return chunks, duration, temp_dir except Exception as e: error_msg = f"音频转换失败: {str(e)}" return [], 0, None def diarize_speakers(self, audio_path): """说话人分离""" try: diarization = self.models["diarization_pipeline"](audio_path) segments = [] for turn, _, speaker in diarization.itertracks(yield_label=True): segments.append({ "start": turn.start, "end": turn.end, "speaker": speaker, "text": "" }) return segments except Exception as e: error_msg = f"说话人分离失败: {str(e)}" raise Exception(error_msg) from e def transcribe_audio_batch(self, chunk_paths): """批量语音识别多个分块""" try: # 批量处理音频分块 results = self.models["asr_pipeline"]( chunk_paths, chunk_length_s=MODEL_CONFIG["chunk_size"], stride_length_s=(4, 2), batch_size=MODEL_CONFIG["batch_size"], return_timestamps=True ) # 整理结果 transcribed_data = [] for result in results: text = result["text"] chunks = result["chunks"] transcribed_data.append((text, chunks)) return transcribed_data except Exception as e: error_msg = f"语音识别失败: {str(e)}" raise Exception(error_msg) from e def analyze_sentiment_batch(self, texts): """批量情感分析 - 支持长文本处理""" try: if not texts: return [] # 预处理文本 - 截断并添加特殊token inputs = self.models["sentiment_tokenizer"]( texts, padding=True, truncation=True, max_length=512, return_tensors="pt" ).to(MODEL_CONFIG["device"]) # 批量推理 with torch.no_grad(): outputs = self.models["sentiment_model"](**inputs) # 计算概率 probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy() # 处理结果 results = [] labels = ["积极", "消极", "中性"] for i, text in enumerate(texts): sentiment = labels[np.argmax(probs[i])] # 情感强度检测 strong_negative = probs[i][1] > 0.7 # 消极概率超过70% strong_positive = probs[i][0] > 0.7 # 积极概率超过70% # 特定情绪检测 specific_emotion = "无" if "生气" in text or "愤怒" in text or "气死" in text: specific_emotion = "愤怒" elif "不耐烦" in text or "快点" in text or "急死" in text: specific_emotion = "不耐烦" elif "失望" in text or "无奈" in text: specific_emotion = "失望" # 如果有强烈情感则覆盖平均结果 if strong_negative: sentiment = "强烈消极" elif strong_positive: sentiment = "强烈积极" results.append({ "sentiment": sentiment, "emotion": specific_emotion, "scores": probs[i].tolist() }) return results except Exception as e: error_msg = f"情感分析失败: {str(e)}" raise Exception(error_msg) from e def match_keywords(self, text, keyword_type): """高级关键词匹配 - 使用分词和同义词""" # 获取关键词列表 keywords = self.keywords.get(keyword_type, []) if not keywords: return False # 分词处理 words = jieba.lcut(text) # 检查每个关键词 for keyword in keywords: # 检查直接匹配 if keyword in text: return True # 检查同义词 synonyms = self.synonyms.get(keyword, []) for synonym in synonyms: if synonym in text: return True # 检查分词匹配(全词匹配) if keyword in words: return True return False def identify_agent(self, segments, full_text): """智能客服身份识别""" # 候选客服信息 candidates = {} # 特征1:开场白关键词 for i, segment in enumerate(segments[:5]): # 检查前5个片段 if self.match_keywords(segment["text"], "opening"): speaker = segment["speaker"] candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 3 # 开场白权重高 candidates[speaker]["segments"].append(i) # 特征2:结束语关键词 for i, segment in enumerate(segments[-3:]): # 检查最后3个片段 if self.match_keywords(segment["text"], "closing"): speaker = segment["speaker"] candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 2 # 结束语权重中等 candidates[speaker]["segments"].append(len(segments) - 3 + i) # 特征3:说话长 speaker_durations = {} for segment in segments: duration = segment["end"] - segment["start"] speaker_durations[segment["speaker"]] = speaker_durations.get(segment["speaker"], 0) + duration # 为说话长最长的加分 if speaker_durations: max_duration = max(speaker_durations.values()) for speaker, duration in speaker_durations.items(): candidates.setdefault(speaker, {"score": 0, "segments": []}) if duration == max_duration: candidates[speaker]["score"] += 1 # 特征4:客服特定词汇出现频率 agent_keywords = ["客服", "代表", "专员", "先生", "女士"] speaker_keyword_count = {} for segment in segments: text = segment["text"] speaker = segment["speaker"] for word in agent_keywords: if word in text: speaker_keyword_count[speaker] = speaker_keyword_count.get(speaker, 0) + 1 # 为关键词出现最多的加分 if speaker_keyword_count: max_count = max(speaker_keyword_count.values()) for speaker, count in speaker_keyword_count.items(): if count == max_count: candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 1 # 选择得分最高的作为客服 if candidates: best_speaker = max(candidates.items(), key=lambda x: x[1]["score"])[0] return best_speaker # 默认选择第一个说话人 return segments[0]["speaker"] if segments else None def associate_speaker_text(self, segments, full_text_chunks): """基于间重叠度的说话人-文本关联""" for segment in segments: segment_text = "" segment_start = segment["start"] segment_end = segment["end"] for word_info in full_text_chunks: if "global_start" not in word_info: continue word_start = word_info["global_start"] word_end = word_info["global_end"] # 计算重叠度 overlap_start = max(segment_start, word_start) overlap_end = min(segment_end, word_end) overlap = max(0, overlap_end - overlap_start) # 计算重叠比例 word_duration = word_end - word_start segment_duration = segment_end - segment_start if overlap > 0: # 如果重叠超过50%或单词完全在片段内 if (overlap / word_duration > 0.5) or (overlap / segment_duration > 0.5): segment_text += word_info["text"] + " " segment["text"] = segment_text.strip() def analyze_audio(self, audio_path): """完整分析单个音频文件 - 优化版本""" try: # 步骤1: 转换音频并分块(使用临目录) chunks, duration, temp_dir = self.convert_audio(audio_path) if not chunks or not temp_dir: raise Exception("音频转换失败或未生成分块") try: # 步骤2: 说话人分离 segments = self.diarize_speakers(audio_path) # 步骤3: 批量语音识别 chunk_paths = [chunk["path"] for chunk in chunks] transcribed_data = self.transcribe_audio_batch(chunk_paths) # 步骤4: 处理识别结果 full_text_chunks = [] for idx, (text, chunk_data) in enumerate(transcribed_data): chunk = chunks[idx] # 调整间戳为全局间 for word_info in chunk_data: if "timestamp" in word_info: start, end = word_info["timestamp"] word_info["global_start"] = chunk["start_time"] + start word_info["global_end"] = chunk["start_time"] + end else: word_info["global_start"] = chunk["start_time"] word_info["global_end"] = chunk["end_time"] full_text_chunks.extend(chunk_data) # 步骤5: 基于间重叠度关联说话人和文本 self.associate_speaker_text(segments, full_text_chunks) # 步骤6: 智能识别客服身份 agent_id = self.identify_agent(segments, full_text_chunks) # 步骤7: 提取客服和客户文本 agent_text = "" customer_text = "" opening_found = False closing_found = False forbidden_found = False for segment in segments: if segment["speaker"] == agent_id: agent_text += segment["text"] + " " else: customer_text += segment["text"] + " " # 使用高级关键词匹配 if not opening_found and self.match_keywords(segment["text"], "opening"): opening_found = True if not closing_found and self.match_keywords(segment["text"], "closing"): closing_found = True if not forbidden_found and self.match_keywords(segment["text"], "forbidden"): forbidden_found = True # 步骤8: 批量情感分析 sentiment_results = self.analyze_sentiment_batch([agent_text, customer_text]) if sentiment_results: agent_sentiment = sentiment_results[0]["sentiment"] agent_emotion = sentiment_results[0]["emotion"] customer_sentiment = sentiment_results[1]["sentiment"] customer_emotion = sentiment_results[1]["emotion"] else: agent_sentiment = "未知" agent_emotion = "无" customer_sentiment = "未知" customer_emotion = "无" # 问题解决率分析 solution_found = self.match_keywords(agent_text, "solution") # 语速分析 agent_words = len(agent_text.split()) agent_duration = sum([s["end"] - s["start"] for s in segments if s["speaker"] == agent_id]) agent_speed = agent_words / (agent_duration / 60) if agent_duration > 0 else 0 # 词/分钟 # 音量分析(简单版) try: y, sr = librosa.load(audio_path, sr=MODEL_CONFIG["sample_rate"]) rms = librosa.feature.rms(y=y) avg_volume = np.mean(rms) volume_stability = np.std(rms) / avg_volume if avg_volume > 0 else 0 except: avg_volume = 0 volume_stability = 0 # 构建结果 result = { "file_name": os.path.basename(audio_path), "duration": round(duration, 2), "opening_check": "是" if opening_found else "否", "closing_check": "是" if closing_found else "否", "forbidden_check": "是" if forbidden_found else "否", "agent_sentiment": agent_sentiment, "agent_emotion": agent_emotion, "customer_sentiment": customer_sentiment, "customer_emotion": customer_emotion, "agent_speed": round(agent_speed, 1), "volume_level": round(avg_volume, 4), "volume_stability": round(volume_stability, 2), "solution_rate": "是" if solution_found else "否", "agent_text": agent_text[:500] + "..." if len(agent_text) > 500 else agent_text, "customer_text": customer_text[:500] + "..." if len(customer_text) > 500 else customer_text } return result finally: # 自动清理临目录 temp_dir.cleanup() except Exception as e: error_msg = f"分析文件 {os.path.basename(audio_path)} 出错: {str(e)}" raise Exception(error_msg) from e class AnalysisThread(QThread): """分析线程 - 并行优化版本""" progress = pyqtSignal(int, str) result_ready = pyqtSignal(dict) finished_all = pyqtSignal() error_occurred = pyqtSignal(str, str) def __init__(self, audio_files, keywords_file, output_dir, models): super().__init__() self.audio_files = audio_files self.keywords_file = keywords_file self.output_dir = output_dir self.stop_requested = False self.analyzer = AudioAnalyzer(models) self.completed_count = 0 def run(self): try: total = len(self.audio_files) # 加载关键词 if self.keywords_file: success, msg = self.analyzer.load_keywords(self.keywords_file) if not success: self.error_occurred.emit("关键词加载", msg) results = [] errors = [] # 使用线程池进行并行处理 with ThreadPoolExecutor(max_workers=MODEL_CONFIG["max_workers"]) as executor: # 提交所有任务 future_to_file = { executor.submit(self.analyzer.analyze_audio, audio_file): audio_file for audio_file in self.audio_files } # 处理完成的任务 for future in as_completed(future_to_file): if self.stop_requested: break audio_file = future_to_file[future] try: result = future.result() if result: results.append(result) self.result_ready.emit(result) except Exception as e: error_msg = str(e) errors.append({ "file": audio_file, "error": error_msg }) self.error_occurred.emit(os.path.basename(audio_file), error_msg) # 更新进度 self.completed_count += 1 progress = int(self.completed_count / total * 100) self.progress.emit( progress, f"已完成 {self.completed_count}/{total} ({progress}%)" ) # 生成报告 if results: self.generate_reports(results, errors) self.finished_all.emit() except Exception as e: self.error_occurred.emit("全局错误", str(e)) def stop(self): self.stop_requested = True def generate_reports(self, results, errors): """生成Excel和Word报告 - 优化版本""" try: # 生成Excel报告 df = pd.DataFrame(results) excel_path = os.path.join(self.output_dir, "质检分析报告.xlsx") # 创建Excel写入器 with pd.ExcelWriter(excel_path, engine='xlsxwriter') as writer: df.to_excel(writer, sheet_name='详细结果', index=False) # 添加统计摘要 stats_data = { "指标": ["分析文件总数", "成功分析文件数", "分析失败文件数", "开场白合格率", "结束语合格率", "禁语出现率", "客服积极情绪占比", "客户消极情绪占比", "问题解决率"], "数值": [ len(results) + len(errors), len(results), len(errors), f"{df['opening_check'].value_counts(normalize=True).get('是', 0) * 100:.1f}%", f"{df['closing_check'].value_counts(normalize=True).get('是', 0) * 100:.1f}%", f"{df['forbidden_check'].value_counts(normalize=True).get('是', 0) * 100:.1f}%", f"{df[df['agent_sentiment'] == '积极'].shape[0] / len(df) * 100:.1f}%", f"{df[df['customer_sentiment'] == '消极'].shape[0] / len(df) * 100:.1f}%", f"{df['solution_rate'].value_counts(normalize=True).get('是', 0) * 100:.1f}%" ] } stats_df = pd.DataFrame(stats_data) stats_df.to_excel(writer, sheet_name='统计摘要', index=False) # 生成Word报告 doc = Document() doc.add_heading('外呼电话质检分析汇总报告', 0) # 添加统计信息 doc.add_heading('整体统计', level=1) stats = [ f"分析文件总数: {len(results) + len(errors)}", f"成功分析文件数: {len(results)}", f"分析失败文件数: {len(errors)}", f"开场白合格率: {stats_data['数值'][3]}", f"结束语合格率: {stats_data['数值'][4]}", f"禁语出现率: {stats_data['数值'][5]}", f"客服积极情绪占比: {stats_data['数值'][6]}", f"客户消极情绪占比: {stats_data['数值'][7]}", f"问题解决率: {stats_data['数值'][8]}" ] for stat in stats: doc.add_paragraph(stat) # 添加图表 self.add_charts(doc, df) # 添加错误列表 if errors: doc.add_heading('分析失败文件', level=1) table = doc.add_table(rows=1, cols=2) hdr_cells = table.rows[0].cells hdr_cells[0].text = '文件' hdr_cells[1].text = '错误原因' for error in errors: row_cells = table.add_row().cells row_cells[0].text = os.path.basename(error['file']) row_cells[1].text = error['error'] word_path = os.path.join(self.output_dir, "可视化分析报告.docx") doc.save(word_path) return True, f"报告已保存到: {self.output_dir}" except Exception as e: return False, f"生成报告失败: {str(e)}" def add_charts(self, doc, df): """在Word文档中添加图表""" try: # 客服情感分布 fig1, ax1 = plt.subplots(figsize=(6, 4)) sentiment_counts = df['agent_sentiment'].value_counts() sentiment_counts.plot(kind='bar', ax=ax1, color=['green', 'red', 'blue', 'darkred', 'darkgreen']) ax1.set_title('客服情感分布') ax1.set_xlabel('情感类型') ax1.set_ylabel('数量') fig1.tight_layout() fig1.savefig('agent_sentiment.png') doc.add_picture('agent_sentiment.png', width=Inches(5)) os.remove('agent_sentiment.png') # 客户情感分布 fig2, ax2 = plt.subplots(figsize=(6, 4)) df['customer_sentiment'].value_counts().plot(kind='bar', ax=ax2, color=['green', 'red', 'blue', 'darkred', 'darkgreen']) ax2.set_title('客户情感分布') ax2.set_xlabel('情感类型') ax2.set_ylabel('数量') fig2.tight_layout() fig2.savefig('customer_sentiment.png') doc.add_picture('customer_sentiment.png', width=Inches(5)) os.remove('customer_sentiment.png') # 合规性检查 fig3, ax3 = plt.subplots(figsize=(6, 4)) compliance = df[['opening_check', 'closing_check', 'forbidden_check']].apply( lambda x: x.value_counts().get('是', 0)) compliance.plot(kind='bar', ax=ax3, color=['blue', 'green', 'red']) ax3.set_title('合规性检查') ax3.set_xlabel('检查项') ax3.set_ylabel('合格数量') fig3.tight_layout() fig3.savefig('compliance.png') doc.add_picture('compliance.png', width=Inches(5)) os.remove('compliance.png') except Exception as e: print(f"生成图表失败: {str(e)}") class MainWindow(QMainWindow): """主界面 - 优化版本""" def __init__(self): super().__init__() self.setWindowTitle("外呼电话录音质检分析系统") self.setGeometry(100, 100, 1000, 800) # 初始化变量 self.audio_files = [] self.keywords_file = "" self.output_dir = os.getcwd() self.analysis_thread = None self.model_loader = None self.models = {} self.models_loaded = False # 初始化为False # 设置全局字体 app_font = QFont("Microsoft YaHei", 10) QApplication.setFont(app_font) # 创建主布局 main_widget = QWidget() main_layout = QVBoxLayout() main_layout.setSpacing(10) main_layout.setContentsMargins(15, 15, 15, 15) # 状态栏 self.status_label = QLabel("准备就绪") self.status_label.setAlignment(Qt.AlignCenter) self.status_label.setStyleSheet("background-color: #f0f0f0; padding: 5px; border-radius: 5px;") # 文件选择区域 file_group = QGroupBox("文件选择") file_layout = QVBoxLayout() file_layout.setSpacing(10) # 音频选择 audio_layout = QHBoxLayout() self.audio_label = QLabel("音频文件/文件夹:") self.audio_path_edit = QLineEdit() self.audio_path_edit.setReadOnly(True) self.audio_path_edit.setPlaceholderText("请选择音频文件或文件夹") self.audio_browse_btn = QPushButton("浏览...") self.audio_browse_btn.setFixedWidth(80) self.audio_browse_btn.clicked.connect(self.browse_audio) audio_layout.addWidget(self.audio_label) audio_layout.addWidget(self.audio_path_edit, 1) audio_layout.addWidget(self.audio_browse_btn) # 关键词选择 keyword_layout = QHBoxLayout() self.keyword_label = QLabel("关键词文件:") self.keyword_path_edit = QLineEdit() self.keyword_path_edit.setReadOnly(True) self.keyword_path_edit.setPlaceholderText("可选:选择关键词Excel文件") self.keyword_browse_btn = QPushButton("浏览...") self.keyword_browse_btn.setFixedWidth(80) self.keyword_browse_btn.clicked.connect(self.browse_keywords) keyword_layout.addWidget(self.keyword_label) keyword_layout.addWidget(self.keyword_path_edit, 1) keyword_layout.addWidget(self.keyword_browse_btn) # 输出目录 output_layout = QHBoxLayout() self.output_label = QLabel("输出目录:") self.output_path_edit = QLineEdit(os.getcwd()) self.output_path_edit.setReadOnly(True) self.output_browse_btn = QPushButton("浏览...") self.output_browse_btn.setFixedWidth(80) self.output_browse_btn.clicked.connect(self.browse_output) output_layout.addWidget(self.output_label) output_layout.addWidget(self.output_path_edit, 1) output_layout.addWidget(self.output_browse_btn) file_layout.addLayout(audio_layout) file_layout.addLayout(keyword_layout) file_layout.addLayout(output_layout) file_group.setLayout(file_layout) # 控制按钮区域 control_layout = QHBoxLayout() control_layout.setSpacing(15) self.start_btn = QPushButton("开始分析") self.start_btn.setFixedHeight(40) self.start_btn.setStyleSheet("background-color: #4CAF50; color: white; font-weight: bold;") self.start_btn.clicked.connect(self.start_analysis) self.stop_btn = QPushButton("停止分析") self.stop_btn.setFixedHeight(40) self.stop_btn.setStyleSheet("background-color: #f44336; color: white; font-weight: bold;") self.stop_btn.clicked.connect(self.stop_analysis) self.stop_btn.setEnabled(False) self.clear_btn = QPushButton("清空") self.clear_btn.setFixedHeight(40) self.clear_btn.setStyleSheet("background-color: #2196F3; color: white; font-weight: bold;") self.clear_btn.clicked.connect(self.clear_all) control_layout.addWidget(self.start_btn) control_layout.addWidget(self.stop_btn) control_layout.addWidget(self.clear_btn) # 进度条 self.progress_bar = QProgressBar() self.progress_bar.setRange(0, 100) self.progress_bar.setTextVisible(True) self.progress_bar.setStyleSheet("QProgressBar {border: 1px solid grey; border-radius: 5px; text-align: center;}" "QProgressBar::chunk {background-color: #4CAF50; width: 10px;}") # 结果展示区域 result_group = QGroupBox("分析结果") result_layout = QVBoxLayout() result_layout.setSpacing(10) # 结果标签 result_header = QHBoxLayout() self.result_label = QLabel("分析结果:") self.result_count_label = QLabel("0/0") self.result_count_label.setAlignment(Qt.AlignRight) result_header.addWidget(self.result_label) result_header.addWidget(self.result_count_label) self.result_text = QTextEdit() self.result_text.setReadOnly(True) self.result_text.setStyleSheet("font-family: Consolas, 'Microsoft YaHei';") # 错误列表 error_header = QHBoxLayout() self.error_label = QLabel("错误信息:") self.error_count_label = QLabel("0") self.error_count_label.setAlignment(Qt.AlignRight) error_header.addWidget(self.error_label) error_header.addWidget(self.error_count_label) self.error_list = QListWidget() self.error_list.setFixedHeight(120) self.error_list.setStyleSheet("color: #d32f2f;") result_layout.addLayout(result_header) result_layout.addWidget(self.result_text) result_layout.addLayout(error_header) result_layout.addWidget(self.error_list) result_group.setLayout(result_layout) # 添加到主布局 main_layout.addWidget(file_group) main_layout.addLayout(control_layout) main_layout.addWidget(self.progress_bar) main_layout.addWidget(self.status_label) main_layout.addWidget(result_group) main_widget.setLayout(main_layout) self.setCentralWidget(main_widget) # 启动模型加载 self.load_models() def load_models(self): """后台加载模型""" self.status_label.setText("正在加载AI模型,请稍候...") self.start_btn.setEnabled(False) self.model_loader = ModelLoader() self.model_loader.progress.connect(self.update_model_loading_status) self.model_loader.finished.connect(self.handle_model_loading_finished) self.model_loader.start() def update_model_loading_status(self, message): """更新模型加载状态""" self.status_label.setText(message) def handle_model_loading_finished(self, success, message): """处理模型加载完成""" if success: self.models = self.model_loader.models self.models_loaded = True # 修复标志位 self.status_label.setText(message) self.start_btn.setEnabled(True) else: self.status_label.setText(message) QMessageBox.critical(self, "模型加载失败", message) def browse_audio(self): """选择音频文件或文件夹""" options = QFileDialog.Options() files, _ = QFileDialog.getOpenFileNames( self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.flac *.m4a);;所有文件 (*)", options=options ) if files: self.audio_files = files self.audio_path_edit.setText(f"已选择 {len(files)} 个文件") self.result_count_label.setText(f"0/{len(files)}") def browse_keywords(self): """选择关键词文件""" options = QFileDialog.Options() file, _ = QFileDialog.getOpenFileName( self, "选择关键词文件", "", "Excel文件 (*.xlsx);;所有文件 (*)", options=options ) if file: self.keywords_file = file self.keyword_path_edit.setText(os.path.basename(file)) def browse_output(self): """选择输出目录""" options = QFileDialog.Options() directory = QFileDialog.getExistingDirectory( self, "选择输出目录", options=options ) if directory: self.output_dir = directory self.output_path_edit.setText(directory) def start_analysis(self): """开始分析""" if not self.audio_files: self.show_message("错误", "请先选择音频文件!") return if not self.models_loaded: # 使用修复后的标志位 self.show_message("错误", "AI模型尚未加载完成!") return # 检查输出目录 if not os.path.exists(self.output_dir): try: os.makedirs(self.output_dir) except Exception as e: self.show_message("错误", f"无法创建输出目录: {str(e)}") return # 更新UI状态 self.start_btn.setEnabled(False) self.stop_btn.setEnabled(True) self.result_text.clear() self.error_list.clear() self.error_count_label.setText("0") self.result_text.append("开始分析音频文件...") self.progress_bar.setValue(0) # 创建并启动分析线程 self.analysis_thread = AnalysisThread( self.audio_files, self.keywords_file, self.output_dir, self.models ) # 连接信号 self.analysis_thread.progress.connect(self.update_progress) self.analysis_thread.result_ready.connect(self.handle_result) self.analysis_thread.finished_all.connect(self.analysis_finished) self.analysis_thread.error_occurred.connect(self.handle_error) self.analysis_thread.start() def stop_analysis(self): """停止分析""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.analysis_thread.wait() self.result_text.append("分析已停止") self.status_label.setText("分析已停止") self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) def clear_all(self): """清空所有内容""" self.audio_files = [] self.keywords_file = "" self.audio_path_edit.clear() self.keyword_path_edit.clear() self.result_text.clear() self.error_list.clear() self.progress_bar.setValue(0) self.status_label.setText("准备就绪") self.result_count_label.setText("0/0") self.error_count_label.setText("0") def update_progress(self, value, message): """更新进度""" self.progress_bar.setValue(value) self.status_label.setText(message) # 更新结果计数 if "已完成" in message: parts = message.split() if len(parts) >= 2: self.result_count_label.setText(parts[1]) def handle_result(self, result): """处理单个结果""" summary = f""" 文件: {result['file_name']} 长: {result['duration']}秒 ---------------------------------------- 开场白: {result['opening_check']} | 结束语: {result['closing_check']} | 禁语: {result['forbidden_check']} 客服情感: {result['agent_sentiment']} ({result['agent_emotion']}) | 语速: {result['agent_speed']}词/分 客户情感: {result['customer_sentiment']} ({result['customer_emotion']}) 问题解决: {result['solution_rate']} 音量水平: {result['volume_level']} | 稳定性: {result['volume_stability']} ---------------------------------------- """ self.result_text.append(summary) def handle_error(self, file_name, error): """处理错误""" self.error_list.addItem(f"{file_name}: {error}") self.error_count_label.setText(str(self.error_list.count())) def analysis_finished(self): """分析完成""" self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) self.status_label.setText(f"分析完成! 报告已保存到: {self.output_dir}") self.result_text.append("分析完成!") # 显示完成消息 self.show_message("完成", f"分析完成! 报告已保存到: {self.output_dir}") def show_message(self, title, message): """显示消息对话框""" msg = QMessageBox(self) msg.setWindowTitle(title) msg.setText(message) msg.setStandardButtons(QMessageBox.Ok) msg.exec_() if __name__ == "__main__": app = QApplication(sys.argv) # 检查GPU可用性 if MODEL_CONFIG["device"] == "cuda": try: gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) print(f"GPU内存: {gpu_mem:.2f}GB") # 根据GPU内存调整并行度 if gpu_mem < 4: # 确保有足够内存 MODEL_CONFIG["device"] = "cpu" MODEL_CONFIG["max_workers"] = 4 print("GPU内存不足,切换到CPU模式") elif gpu_mem < 8: MODEL_CONFIG["max_workers"] = 2 else: MODEL_CONFIG["max_workers"] = 4 except: MODEL_CONFIG["device"] = "cpu" MODEL_CONFIG["max_workers"] = 4 print("无法获取GPU信息,切换到CPU模式") window = MainWindow() window.show() sys.exit(app.exec_())
07-19
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值