界面版_用 Python 解锁电影台词中的秘密:给孩子一个学英语的新奇方式

在上一部分,我们已经详细介绍了 PDF 分析器的功能和逻辑。

接下来,我们将深入探讨如何使用 PyQt5 实现一个既美观又实用的界面。以下是本项目界面设计的关键代码和技巧:

代码实现
import sys
import os
import re
from collections import Counter
from PyQt5.QtWidgets import (QApplication, QMainWindow, QFileDialog, QPushButton, QLabel, QVBoxLayout, QWidget, QStackedWidget, QComboBox, QHBoxLayout, QProgressBar)
from PyQt5.QtGui import QPixmap
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from PyPDF2 import PdfReader
from docx import Document
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
import pandas as pd

# Set the font to handle Chinese characters
from matplotlib import rcParams
rcParams['font.family'] = ['SimHei']

class WorkerThread(QThread):
    progress = pyqtSignal(int)
    finished = pyqtSignal(Counter)

    def run(self):
        # 文件处理逻辑
        word_frequencies = Counter()
        # 这里填入处理文件的代码
        # 发射进度信号
        self.progress.emit(100)  # 假设完成100%的进度
        self.finished.emit(word_frequencies)  # 发射结果信号

class PDFAnalyzer(QMainWindow):
    def __init__(self):
        super().__init__()

        self.setWindowTitle('PDF 分析器')
        self.setGeometry(100, 100, 1200, 900)

        self.central_widget = QWidget()
        self.setCentralWidget(self.central_widget)

        self.main_layout = QVBoxLayout()
        self.central_widget.setLayout(self.main_layout)

        self.top_layout = QHBoxLayout()
        self.main_layout.addLayout(self.top_layout)

        # 文件夹选择按钮
        self.open_button = QPushButton('选择 文件夹')
        self.open_button.setStyleSheet("background-color: #4CAF50; color: white; font-size: 14px; padding: 10px; border-radius: 5px;")
        self.open_button.clicked.connect(self.open_folder)
        self.top_layout.addWidget(self.open_button)

        # 生成报告按钮
        self.report_button = QPushButton('生成报告')
        self.report_button.setStyleSheet("background-color: #2196F3; color: white; font-size: 14px; padding: 10px; border-radius: 5px;")
        self.report_button.clicked.connect(self.generate_report)
        self.top_layout.addWidget(self.report_button)

        # 导出按钮
        self.export_button = QPushButton('导出数据')
        self.export_button.setStyleSheet("background-color: #FF5722; color: white; font-size: 14px; padding: 10px; border-radius: 5px;")
        self.export_button.clicked.connect(self.export_data)
        self.top_layout.addWidget(self.export_button)

        # 状态标签
        self.status_label = QLabel('状态: 准备就绪')
        self.top_layout.addWidget(self.status_label)

        # 图表选择下拉框
        self.chart_selector = QComboBox()
        self.chart_selector.addItems(['词云图', '词频直方图', '字母分布饼图', '累计频率图'])
        self.chart_selector.currentIndexChanged.connect(self.update_chart)
        self.top_layout.addWidget(self.chart_selector)

        # 图表显示区域
        self.chart_display = QStackedWidget()
        self.main_layout.addWidget(self.chart_display)

        # 创建各个图表的显示画布
        self.wordcloud_canvas = FigureCanvas(plt.Figure())
        self.barchart_canvas = FigureCanvas(plt.Figure())
        self.piechart_canvas = FigureCanvas(plt.Figure())
        self.cumulative_canvas = FigureCanvas(plt.Figure())

        self.chart_display.addWidget(self.wordcloud_canvas)
        self.chart_display.addWidget(self.barchart_canvas)
        self.chart_display.addWidget(self.piechart_canvas)
        self.chart_display.addWidget(self.cumulative_canvas)

        # 进度条
        self.progress_bar = QProgressBar()
        self.progress_bar.setRange(0, 100)
        self.progress_bar.setTextVisible(True)
        self.main_layout.addWidget(self.progress_bar)

        # 初始状态
        self.folder_path = None
        self.word_frequencies = None

        # 线程初始化
        self.worker = WorkerThread()
        self.worker.progress.connect(self.update_progress)
        self.worker.finished.connect(self.on_process_finished)

    def open_folder(self):
        folder_path = QFileDialog.getExistingDirectory(self, '选择文件夹')
        if folder_path:
            self.status_label.setText(f'选择的文件夹: {folder_path}')
            self.folder_path = folder_path
            self.process_files()

    def process_files(self):
        if not self.folder_path:
            self.status_label.setText('请先选择文件夹。')
            return

        # 启动线程处理文件
        self.worker.start()

    def extract_unique_words(self, file_path):
        unique_words = Counter()
        if file_path.endswith(".pdf"):
            reader = PdfReader(file_path)
            for page in reader.pages:
                text = page.extract_text()
                words = re.findall(r'\b[\u4e00-\u9fff\w]+(?:\'[\w]+)?\b', text)
                for word in words:
                    word = word.lower()
                    if len(word) > 1:
                        unique_words[word] += 1

        elif file_path.endswith(".docx"):
            doc = Document(file_path)
            for para in doc.paragraphs:
                text = para.text
                words = re.findall(r'\b[\u4e00-\u9fff\w]+(?:\'[\w]+)?\b', text)
                for word in words:
                    word = word.lower()
                    if len(word) > 1:
                        unique_words[word] += 1

        elif file_path.endswith(".txt"):
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read()
                words = re.findall(r'\b[\u4e00-\u9fff\w]+(?:\'[\w]+)?\b', text)
                for word in words:
                    word = word.lower()
                    if len(word) > 1:
                        unique_words[word] += 1

        return unique_words

    def generate_report(self):
        if not self.word_frequencies:
            self.status_label.setText('没有数据来生成报告。')
            return

        self.generate_wordcloud(self.word_frequencies)
        self.generate_bar_chart(self.word_frequencies)
        self.generate_pie_chart(self.word_frequencies)
        self.generate_cumulative_frequency_chart(self.word_frequencies)

        self.status_label.setText('报告生成完成。')

    def update_chart(self):
        index = self.chart_selector.currentIndex()
        self.chart_display.setCurrentIndex(index)

    def generate_wordcloud(self, word_frequencies):
        wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_frequencies)
        self.wordcloud_canvas.figure.clear()
        ax = self.wordcloud_canvas.figure.add_subplot(111)
        ax.imshow(wordcloud, interpolation='bilinear')
        ax.axis('off')
        self.wordcloud_canvas.draw()

    def generate_bar_chart(self, word_frequencies, top_n=10):
        most_common_words = word_frequencies.most_common(top_n)
        words, counts = zip(*most_common_words)

        self.barchart_canvas.figure.clear()
        ax = self.barchart_canvas.figure.add_subplot(111)
        bars = ax.bar(words, counts, color='skyblue')
        ax.set_xlabel('单词')
        ax.set_ylabel('频率')
        ax.set_title(f'前 {top_n} 个最常见的单词')

        # Set the labels and tick positions
        ax.set_xticks(range(len(words)))
        ax.set_xticklabels(words, rotation=45)
        
        self.barchart_canvas.draw()

    def generate_pie_chart(self, word_frequencies):
        letters = 'abcdefghijklmnopqrstuvwxyz'
        letter_counts = {letter: 0 for letter in letters}
        
        # 处理中文字符
        for word in word_frequencies:
            if word[0] in letter_counts:
                letter_counts[word[0]] += 1
            else:
                letter_counts[word[0]] = 1

        labels = [letter for letter in letter_counts if letter_counts[letter] > 0]
        sizes = [letter_counts[letter] for letter in letter_counts if letter_counts[letter] > 0]

        self.piechart_canvas.figure.clear()
        ax = self.piechart_canvas.figure.add_subplot(111)
        ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140, colors=plt.cm.Paired(range(len(labels))))
        ax.set_title('以字母或汉字开头的单词分布')
        self.piechart_canvas.draw()

    def generate_cumulative_frequency_chart(self, word_frequencies):
        sorted_words = sorted(word_frequencies.items(), key=lambda x: x[1], reverse=True)
        words, frequencies = zip(*sorted_words)

        cumulative_frequencies = [sum(frequencies[:i+1]) for i in range(len(frequencies))]

        self.cumulative_canvas.figure.clear()
        ax = self.cumulative_canvas.figure.add_subplot(111)
        ax.plot(words, cumulative_frequencies, marker='o')
        ax.set_xlabel('单词')
        ax.set_ylabel('累计频率')
        ax.set_title('词频累计分布')

        # Set the labels and tick positions
        ax.set_xticks(range(len(words)))
        ax.set_xticklabels(words, rotation=45)
        
        self.cumulative_canvas.draw()

    def export_data(self):
        if not self.word_frequencies:
            self.status_label.setText('没有数据可供导出。')
            return

        file_path, _ = QFileDialog.getSaveFileName(self, '保存数据', '', 'CSV Files (*.csv)')
        if file_path:
            df = pd.DataFrame(self.word_frequencies.items(), columns=['Word', 'Frequency'])
            df.to_csv(file_path, index=False)
            self.status_label.setText(f'数据已导出到: {file_path}')

    def update_progress(self, value):
        self.progress_bar.setValue(value)

    def on_process_finished(self, word_frequencies):
        self.word_frequencies = word_frequencies
        self.progress_bar.setValue(100)
        self.status_label.setText('文件处理完成。')

if __name__ == '__main__':
    app = QApplication(sys.argv)
    main_window = PDFAnalyzer()
    main_window.show()
    sys.exit(app.exec_())
代码解析
  1. 界面布局

    • 按钮:包括选择文件夹、生成报告、导出数据等功能按钮。通过设置样式表,我们确保按钮的外观既简洁又吸引眼球。
    • 进度条:用于显示文件处理的实时进度,提升用户体验。
    • 图表选择下拉框:让用户选择不同的图表类型,并在下方的 QStackedWidget 中显示相应的图表。
  2. 图表生成

    • 使用 matplotlib 生成各种图表,如词云图、词频直方图、字母分布饼图和累计频率图。所有图表都嵌入到 PyQt5 界面中,用户无需切换应用即可查看结果。
  3. 多线程处理

    • 使用 QThread 实现文件处理任务的后台执行,确保界面的流畅性和响应速度。
  4. 数据导出

    • 用户可以将分析结果导出为 CSV 文件,便于后续分析和分享。
总结

通过这个项目,我们实现了一个功能强大的 PDF 分析器,不仅具备了完整的数据处理能力,还通过精美的界面和直观的进度条提升了用户体验。希望这个示例能够帮助你在 PyQt5 应用程序中实现更高效的数据处理和展示功能。如果你有任何问题或建议,请随时留言讨论。让我们一起探索更多的编程奇迹吧!

  • 10
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值