通过self来增删改查文本文档的内容

最新推荐文章于 2022-10-17 22:57:50 发布

cosmosForMe

最新推荐文章于 2022-10-17 22:57:50 发布

阅读量575

点赞数

分类专栏： Python算法初级项目集合文章标签：散列表哈希算法数据结构

本文链接：https://blog.csdn.net/cosmosForMe/article/details/122015434

版权

Python算法初级项目集合专栏收录该内容

1 篇文章 0 订阅

订阅专栏

见2021 week12 task1

建立字典来承载文本文档的内容，此段代码要放在dictionary.py文件里。

通过从hash_table文件引入已经编译好的LinearProbeHashTable，同时也要从python内置的typing库里引入Tuple，还要再引用timeit来计时。

from hash_table import LinearProbeHashTable
from typing import Tuple
import timeit


class Dictionary:
    DEFAULT_ENCODING = 'utf-8'

    def __init__(self, hash_base: int, table_size: int) -> None:
        self.hash_table = LinearProbeHashTable(hash_base, table_size)

    def load_dictionary(self, filename: str, time_limit: int = None) -> int:
        # self.table = LinearProbeHashTable(self.hash_base, self.table_size)
        start_time = timeit.default_timer()

        words = 0
        with open(filename, 'r', encoding=Dictionary.DEFAULT_ENCODING) as file:
            line = file.readline()
            while line:
                line = line.strip()
                self.hash_table[line] = 1
                if time_limit is not None and timeit.default_timer() - start_time > time_limit:
                    raise TimeoutError("Exceeded time limit: " + str(time_limit))
                words += 1
                line = file.readline()

        return words

    def add_word(self, word: str) -> None:
        self.hash_table[word.lower()] = 1

    def find_word(self, word: str) -> bool:
        return word.lower() in self.hash_table

    def delete_word(self, word: str) -> None:
        del self.hash_table[word.lower()]


def process_option(dictionary: Dictionary, method_name: str) -> None:
    """ Helper code for processing menu options."""
    if method_name == 'read_file':
        filename = input('Enter filename: ')
        try:
            dictionary.load_dictionary(filename)
            print('Successfully read file')
        except FileNotFoundError as e:
            print(e)
    else:
        word = input('Enter word: ')
        if method_name == 'add_word':
            dictionary.add_word(word)
            try:
                dictionary.add_word(word)
                print('[{}] {}'.format(word, 'Successfully added'))
            except IndexError as e:
                print('[{}] {}'.format(word, e))
        elif method_name == 'find_word':
            if dictionary.find_word(word):
                print('[{}] {}'.format(word, 'Found in dictionary'))
            else:
                print('[{}] {}'.format(word, 'Not found in dictionary'))
        elif method_name == 'delete_word':
            try:
                dictionary.delete_word(word)
                print('[{}] {}'.format(word, 'Deleted from dictionary'))
            except KeyError:
                print('[{}] {}'.format(word, 'Not found in dictionary'))


def menu(dictionary: Dictionary):
    """ Wrapper for using the dictionary. """
    option = None
    menu_options = {'read_file': 'Read File',
                    'add_word': 'Add Word',
                    'find_word': 'Find Word',
                    'delete_word': 'Delete Word',
                    'exit': 'Exit'}

    exit_option = list(menu_options.keys()).index('exit') + 1

    while option != exit_option:
        print('---------------------')
        opt = 1
        for menu_option in menu_options.values():
            print('{}. {}'.format(opt, menu_option))
            opt += 1
        print('---------------------')
        try:
            option = int(input("Enter option: "))
            if option < 1 or option > exit_option:
                raise ValueError('Option must be between 1 and ' + str(exit_option))
        except ValueError as e:
            print('[{}] {}'.format('menu', e))
        else:
            if option != exit_option:
                process_option(dictionary, list(menu_options.keys())[option - 1])
    print("---------------------")


if __name__ == '__main__':
    dictionary = Dictionary(31, 250727)
    menu(dictionary)

将此段代码放在frequency.py文件里：

from enum import Enum
from string import punctuation
from dictionary import Dictionary
from hash_table import LinearProbeHashTable


class Rarity(Enum):
    COMMON = 0
    UNCOMMON = 1
    RARE = 2
    MISSPELT = 3


class Frequency:
    # TODO
    # raise NotImplementedError
    def __init__(self) -> None:
        self.hash_base = 27183
        self.table_size = 250727
        self.hash_table = LinearProbeHashTable(self.hash_base, self.table_size)
        self.dictionary = Dictionary(self.hash_base, self.table_size)
        self.dictionary.load_dictionary('english_large.txt', 10)
        self.max_word = ('', 0)

    # O(N)
    def add_file(self, filename: str) -> None:
        with open(filename, mode = 'r', encoding = 'utf-8') as f:
            content = f.read().split() # split words in text
            for word in content:
                word = word.strip(punctuation).lower()
                if self.dictionary.find_word(word):
                    if word in self.hash_table:
                        t = self.hash_table[word]
                        self.hash_table[word] = t + 1
                        if self.max_word[1] < t + 1:
                            self.max_word = (word, t + 1)
                    else:
                        self.hash_table.insert(word, 1)

    # O(1)
    def rarity(self, word: str) -> Rarity:
        cnt = self.hash_table[word]
        if cnt >= max(self.max_word[1] / 100, 1):
            return Rarity.COMMON
        elif cnt >= max(self.max_word[1] / 1000, 1):
            return Rarity.UNCOMMON
        elif cnt != 0:
            return Rarity.RARE
        else:
            return Rarity.MISSPELT


def frequency_analysis() -> None:
    # TODO
    pass


if __name__ == '__main__':
    frequency_analysis()

有几个点需要注意，在#TODO部份是根据要求新添加的功能。例如，raise NotImplementedError

self.hash_base = 27183

self.table_size = 250727

定义好hash_base和table_size的大小。

定义__init__后，执行实例化的过程须变成Frequency(arg1)，新建的实例本身，连带其中的参数，会一并传给__init__函数自动并执行它。所以__init__函数的参数列表会在开头多出一项，它永远指代新建的那个实例对象，Python语法要求这个参数必须要有，而名称随意，习惯上就命为self。

接下来要使用已经定义好的hash_table.py, list_adt, referential_array.py。

test_frequency.py文件来测试字典对于文本文档内容的承载以及修改是否成功，通过定义不同的参数来测试方法定义是否成功。

"""Unit Testing for Task 1 and 2"""

import unittest
import sys
from hash_table import LinearProbeHashTable
from frequency import Frequency, Rarity


class TestFrequency(unittest.TestCase):
    def setUp(self) -> None:
        self.frequency = Frequency()

    def test_init(self) -> None:
        self.assertEqual(type(self.frequency.hash_table), LinearProbeHashTable)
        self.assertEqual(self.frequency.dictionary.find_word('test'), 1)

    def test_add_file(self) -> None:
        # TODO: Add 2 or more unit tests
        # raise NotImplementedError
        self.frequency.add_file('215-0.txt')
        self.assertEqual(self.frequency.hash_table['warm'], 2)
        self.frequency.add_file('84-0.txt')
        self.assertEqual(self.frequency.hash_table['warm'], 11)

    def test_rarity(self) -> None:
        # TODO: Add 2 or more unit tests
        # raise NotImplementedError
        self.frequency.add_file('215-0.txt')
        self.assertEqual(self.frequency.rarity('warm'), Rarity.UNCOMMON)
        self.assertEqual(self.frequency.rarity('the'), Rarity.COMMON)


if __name__ == '__main__':
    unittest.main()

cosmosForMe

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
通过self来增删改查文本文档的内容

见2021 week12 task1建立字典来承载文本文档的内容，此段代码要放在dictionary.py文件里。通过从hash_table文件引入已经编译好的LinearProbeHashTable，同时也要从python内置的typing库里引入Tuple，还要再引用timeit来计时。from hash_table import LinearProbeHashTablefrom typing import Tupleimport timeitclass Dictionary:
复制链接

扫一扫