import collections import re import tkinter as tk from tkinter import scrolledtext from tkinter import ttk def preprocess_text(text): # 将文本转换为小写 text = text.lower() # 去除标点符号 text = re.sub(r'[^\w\s]', '', text) return text def word_frequency(text): text = preprocess_text(text) words = text.split() return collections.Counter(words) def read_text_file(file_path, chunk_size=8192): try: word_list = [] with open(file_path, 'r') as file: chunk = file.read(chunk_size) while chunk: word_list.extend(chunk.split()) chunk = file.read(chunk_size) return word_list except FileNotFoundError: print(f"文件 '{file_path}' 未找到。") except Exception as e: print(f"发生错误: {e}") def sort_words_by_frequency(word_dict): return sorted(word_dict.items(), key=lambda x: x[1], reverse=True) def display_word_rank_in_window(sorted_words): root = tk.Tk() root.title("单词排行榜") # 创建一个框架来包含排行榜内容 frame = ttk.Frame(root, padding="10") frame.pack(fill=tk.BOTH, expand=True) # 创建滚动文本框 text_area = scrolledtext.ScrolledText(frame, width=50, height=20) text_area.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) # 创建一个垂直滚动条并与文本框关联 scrollbar = ttk.Scrollbar(frame, orient=tk.VERTICAL, command=text_area.yview) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) text_area.config(yscrollcommand=scrollbar.set) for i, (word, frequency) in enumerate(sorted_words, 1): text_area.insert(tk.INSERT, f"{i}. {word}: {frequency}\n") root.mainloop() # 调用示例 file_path = "C:/Users/23986/Downloads/Walden.txt" # 请将此处替换为您实际的文件路径 words = read_text_file(file_path) word_dict = word_frequency(' '.join(words)) sorted_words = sort_words_by_frequency(word_dict) display_word_rank_in_window(sorted_words)
文章《瓦尔登湖》中的单词出现次数的统计
最新推荐文章于 2024-10-08 12:37:10 发布