文章《瓦尔登湖》中的单词出现次数的统计

最新推荐文章于 2024-10-08 12:37:10 发布

放翁0

最新推荐文章于 2024-10-08 12:37:10 发布

阅读量70

点赞数 3

文章标签：开发语言 python

本文链接：https://blog.csdn.net/2302_76304879/article/details/141164216

版权

 

import collections

import re

import tkinter as tk

from tkinter import scrolledtext

from tkinter import ttk



def preprocess_text(text):

    # 将文本转换为小写

    text = text.lower()

    # 去除标点符号

    text = re.sub(r'[^\w\s]', '', text)

    return text



def word_frequency(text):

    text = preprocess_text(text)

    words = text.split()

    return collections.Counter(words)



def read_text_file(file_path, chunk_size=8192):

    try:

        word_list = []

        with open(file_path, 'r') as file:

            chunk = file.read(chunk_size)

            while chunk:

                word_list.extend(chunk.split())

                chunk = file.read(chunk_size)

        return word_list

    except FileNotFoundError:

        print(f"文件 '{file_path}' 未找到。")

    except Exception as e:

        print(f"发生错误: {e}")



def sort_words_by_frequency(word_dict):

    return sorted(word_dict.items(), key=lambda x: x[1], reverse=True)



def display_word_rank_in_window(sorted_words):

    root = tk.Tk()

    root.title("单词排行榜")



    # 创建一个框架来包含排行榜内容

    frame = ttk.Frame(root, padding="10")

    frame.pack(fill=tk.BOTH, expand=True)



    # 创建滚动文本框

    text_area = scrolledtext.ScrolledText(frame, width=50, height=20)

    text_area.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)



    # 创建一个垂直滚动条并与文本框关联

    scrollbar = ttk.Scrollbar(frame, orient=tk.VERTICAL, command=text_area.yview)

    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

    text_area.config(yscrollcommand=scrollbar.set)



    for i, (word, frequency) in enumerate(sorted_words, 1):

        text_area.insert(tk.INSERT, f"{i}. {word}: {frequency}\n")



    root.mainloop()



# 调用示例

file_path = "C:/Users/23986/Downloads/Walden.txt"  # 请将此处替换为您实际的文件路径

words = read_text_file(file_path)

word_dict = word_frequency(' '.join(words))

sorted_words = sort_words_by_frequency(word_dict)



display_word_rank_in_window(sorted_words)