数据结构与算法Python语言实现《Data Structures & Algorithms in Python》手写课后答案--第十章

最新推荐文章于 2024-09-28 19:15:00 发布

w__daxia

最新推荐文章于 2024-09-28 19:15:00 发布

阅读量1k

点赞数

分类专栏： p y数据结构答案文章标签： python 算法数据结构

本文链接：https://blog.csdn.net/w__daxia/article/details/108983130

版权

博客详细介绍了使用Python实现数据结构中的哈希表和跳跃表，重点讨论了二次探测定理的证明，包括其集群问题以及提出的一个改进策略。此外，还提及了利用费马定理和二次剩余解决相关问题，并给出了字典树的习题代码。

摘要由CSDN通过智能技术生成

第十章

本章讲述哈希表，和跳跃表

有一个难点是证明二次探测定理

The quadratic probing strategy has a clustering problem related to the way it looks for open slots. Namely, when a collision occurs at bucket h(k), it checks buckets A[(h(k) +i2) mod N], for i = 1,2,…,N −1.
a. Show that i2 mod N will assume at most (N + 1)/2 distinct values,for N prime, as i ranges from 1 to N − 1. As a part of this justification, note that i2 mod N = (N −i)2 mod N for all i.
b. A better strategy is to choose a prime N such that N mod 4 = 3 and then to check the buckets A[(h(k) ± i2) mod N] as i ranges from 1 to (N − 1)/2, alternating between plus and minus. Show that this alternate version is guaranteed to check every bucket in A

这题证明用到了费马定理、二次剩余,在10.36中只做了该要说明，如有更好的证明请留言。
本章10.55题，是一个字典树.

习题代码如下(部分代码引用书中源代码，源代码位置目录在第二章答案中介绍)


# 10.1
from collections.abc import MutableMapping as mm
class Mutable(mm):
    ''' simple set ,only support the in-order access '''    
    def __init__(self):
        self._data=[]
        
    def _find_index(self,k):    # find the index
        return k if k<len(self) else None
    
    def __getitem__(self,k):
        index=self._find_index(k)
        if index ==None:
            raise Exception("Error input!")
        return self._data[index]
        
    def __setitem__(self,k,v):
        index=self._find_index(k)
        if index ==None:
            self._data.append(v)
        else:
            self._data[index]=v
        
    def __delitem__(self,k):
        index=self._find_index(k)
        if index ==None:
            raise Exception("Error input!")
        self._data.pop(index)    
        
    def __iter__(self):
        for i in self._data:
            yield i
            
    def __len__(self):
        return len(self._data)
# 10.1
    def pop(self,k=None):
        ''' if k is None,raise error '''
        index=self._find_index(k)
        return self._data.pop(index)
# 10.2
    def items(self):
        for i in range(len(self)):
            yield i,self._data[i]
        
#t=Mutable()
#for i in range(10):    
#    t[i]=i
# 10.1 在pop方法
# 10.2 在items方法
# text10.1
#print(t.pop(0))
# text10.2
#print(list(t.items()))

# 10.3
from TheCode.ch10.unsorted_table_map import UnsortedTableMap as UTM
class UnSorted(UTM):    
    def __init__(self):
        super().__init__()
    def items(self):
        for i in self._table:
            yield i._key,i._value
# 10.3  在items方法中，遍历一遍的时间复杂度为n
#us=UnSorted()
#for i in range(10):
#    us[i]=i
#print(list(us.items()))

# 10.4 在这个M中，当每次添加时需要遍历整个_table，时间复杂度为O(1+2+···n) --> O(n^2)

# 10.5
from TheCode.ch10.unsorted_table_map import UnsortedTableMap as UTM
from TheCode.ch07.positional_list import PositionalList
class UnsortedLinkMap(UTM):
    ''' use link to store the item'''

    def __init__(self):
        self._link=PositionalList()

    def __getitem__(self,k):
        ''' return the value of k,if the k in table else raise'''
        for i in self._link:
            if i._key==k:
                return i._value
        raise KeyError("TypeError ,the k is not exist")
            
    def __setitem__(self,k,v):
        ''' assign the value to the key,overwriting existing value if present'''
        for i in self._link:
            if i._key==k:
                i._value=v
                return
        self._link.add_last(self._Item(k,v))

    def __delitem__(self,k):
        ''' remove item associated with key k(raise TypeError if k not found)'''
        temp=self._link.first()
        while temp!=None:
            if temp.element()._key==k:
                self._link.delete(temp)
                return
            temp=self._link.after(temp)
        raise KeyError("the k is not exist")

    def __len__(self):
        return len(self._link)

    def __iter__(self):
        for i in self._link:
            yield i._key,i._value

#uslm=UnsortedLinkMap()
#for i in range(10):
#    uslm[i]=i
  #print(list(uslm))
#print(uslm[2])
#del uslm[3]
#print(list(uslm))

# 10.6
#解决冲突的方法有两类:
#A:分离链表,该方法是在索引对应的空间下建立链表(储存空间)，当存在冲突时将键值对添加到储存空间中
#B:开放寻址,该方法是在发生冲突时,使用不同的方法找到下一个索引(线性探测、二次探测、双哈希策略、迭代的探测)
#所以A的负载因子可以超过1,B的负载因子最多为1

# 10.7
from TheCode.ch07.positional_list import PositionalList
class HashPositional(PositionalList):
    ''' add the hash method'''
    class Position(PositionalList.Position):
        def __hash__(self):
            return hash((type(self),id(self.element())))     # hash by the id

#hp=HashPositional()
#a=hp.add_first('1')
#b=hp.add_first('2')
#c=hp.first()    # c==b
#print('a==b{},a==c{},b==c{},\nhash(a)={}\nhash(b)={}\nhash(b)={}'.format(a==b,a==c,b==c,hash(a),hash(b),hash(c)))

# 10.8
#不同数字hash不同
#不同字母hash不同
#数字和字母在hash中要有分界
#每个字符顺序位置要能够分开

# 10.9 一个None代表一个位置，纵向代表链表的深度
#l=[12,44,13,88,23,94,11,39,20,16,5]
#index=[(3*i+5)%11 for i in l]  #[8, 5, 0, 5, 8, 1, 5, 1, 10, 9, 9]
# None None None None None None None None None None None
#  13   94             44                  12   16   20
#       39             88                  23   5
#                      11

# 10.10
# None None None None None None None None None None None
#   13  94   39   16   44   88   11   5    12   23   20            

# 10.11
#l=[12,44,13,88,23,94,11,39,20,16,5]
#res=[None]*11
#for i in l:
#    for j in range(100):
#        if res[((3*i+5)%11+j^2)%11]==None:   # quadratic probing
#            res[((3*i+5)%11+j^2)%11]=i
#            break
#    print(res)

# 10.12
#l=[12,44,13,88,23,94,11,39,20,16,5]
#res=[None]*11
#for i in l:
#    for j in range(100):
#        if res[(( 3*i+5)%11+j*(7-i%7))%11]==None:    # quadratic hash j*(7-(imod7))
#            res[(( 3*i+5)%11+j*(7-i%7))%11]=i
#            break
#    print(res)

# 10.13 最坏的情况为O(n^2)，每次hash都是一个索引，每次需要遍历存入的所有节点来添加新的节点
#       最好的情况为O(n)，  每次hash有不同的索引，可以直接通过索引直接存储

# 10.14
#l=[54,28,41,18,10,36,25,38,12,90]
#res=[None]*10
#for i in l:
#    index=7-i%7
#    if res[index]==None:
#        res[index]=i
#    elif type(res[index])==int:
#        res[index]=[res[index],i]
#    else:
#        res[index].append(i)
#print(res)
    
# 10.15
from TheCode.ch10.hash_map_base import HashMapBase
class NewHashMapBase(HashMapBase):
    ''' alter method __init__ __setitem__ ,
    ''' 
    def __init__(self,cap=11,p=109345121,y=0.5):
        """Create an empty hash-table map.
        y       initial load factor
        cap     initial table size (default 11)
        p       positive prime used for MAD (default 109345121)
        """
        super().__init__(cap,p)
        self._y=y
        
    def __setitem__(self, k, v):
        j = self._hash_function(k)
        # subroutine maintains self._n
        self._bucket_setitem(j, k, v)
        if self._n > len(self._table) // (1/self._y):           # keep load factor <= 0.5
            # number 2^x - 1 is often prime
            self._resize(2 * len(self._table) - 1)

# 10.16
#Algorithms _find_index:
# input: the key k
# output: three times
# i=0
# sentry_index=None
# repeat:
#   if  (h(k)+i^2)%N  ==key:        return True,sentry_index
#   if  (h(k)+i^2)%N  ==sentry and sentry_index==None:     sentry_index=i
#   if  (h(k)+i^2)%N  ==None        return False,sentry_index
#   i+=1


#Algorithms set_item:
# input: the key k
# output
#   tuple=_find_index(k)
#   set or add by tuple's content

#Algorithms del_item:
# input: the key k
# output
#   tuple=_find_index(k)
#   del or raise by tuple's content

# 10.17    
from TheCode.ch10.probe_hash_map import ProbeHashMap
class  QuadraticProbe(ProbeHashMap):
    ''' only alter the _find_slot method '''
    def _find_slot(self,j,k):
        """Search for key k in bucket at index j.

        Return (success, index) tuple, described as follows:
        If match was found, success is True and index denotes its location.
        If no match found, success is False and index denotes first available slot.
        """
        firstAvail = None
        temp=j       # store the hash value
        i=0   
        while True:
            if self._is_available(j):
                if firstAvail is None:
                    firstAvail = j                      # mark this as first avail
                if self._table[j] is None:
                    return (False, firstAvail)          # search has failed
            elif k == self._table[j]._key:
                return (True, j)                      # found a match
            j = (temp + i^2) % len(self._table)          # keep looking (cyclically)
            i+=1
#qp=QuadraticProbe()
#for i in range(10):
#    qp[i]=i
#print(qp[3])
#del qp[3]
#print(list(qp.items()))
#print(qp[3])

# 10.18 哈希表的更新操作需要大量的时间。为了维持顺序，很多元素需要调整顺序。

# 10.19 将SortedTableMap中的table换成 link，然后将二分查找替换为非二分查找等

# 10.20 log2n+2n +log(2n-1)+2n-1···· + log1+1  = O(log(2n!)+4n^2)
        
# 10.21 可以
#       新版仅仅返回 high+1
#       low和high最终会在指向同一个元素,然后再次减少指向的区间,使high<low,返回k应该存在的索引

# 10.22 指的是有序映射的应用中最大值集应用
# 当每对元素的价格和性能都低于前一项时，每个元素都不被dominated，所以只需要添加元素，需O(3nlogn)的时间，最后含有n个这样的数值对
# 当每对元素的价格低于前一项性能高于前一项时，每一项的都dominate前面的所有项，需要将前面的元素杀出，需要O(5nlogn)时间

# 10.23 略

# 10.24 使用书中描述的跳跃查找方法
#Algorithms __delitem__:
#   input: key k
#   p=start
#   temp=SkipSearch(k)
#   del temp

# 10.25
from collections.abc import MutableSet
class SimpleSet(MutableSet):
    '''Abstract class'''
#10.25    
    def pop(self):
        ''' del a random item,raise ValueError if len(self)==o  '''
        if len(self)==0:      # abstract method __len__
            raise ValueError("none item")
        for i in self:        # abstract method __iter__
            self.discard(i)   # abstract method discard
            break
#10.26
    def isdisjoint(self,other):
        ''' other is a set'''
        min_set=self
        max_set=other
        if len(min_set)>len(max_set):
            min_set,max_set=max_set,min_set    # find the bigger set

        for i in min_set:
            if i not in max_set:
                return False
        return True
    
# 10.26 在SimpleSet中 isdisjoint方法实现

# 10.27
# A使用链表:将同一天当过生日的信息存储在一段连续的链中，每当度过一天就将当天过生日的节点添加到链表尾部
# B使用多映射哈希表:将多个值映射到日期键中
#A方法可以在O(n)的空间复杂度完成，查询近两天过生日的人的时间复杂度较低，但是查询指定时间过生日的时间复杂度较高
#B方法可以在O(1)的时间复杂度查询任意一天过生日的人。

# 10.28
from TheCode.ch10.unsorted_table_map import UnsortedTableMap
class AlterUTM(UnsortedTableMap):
    ''' rewirte setdefault method'''
    def setdefault(self,k,v):
        ''' return k's value if k in the map,self append (k,v) to the set and return v'''
        try:
            return self[k]              # try find the k
        except KeyError:
            self._table.append(self._Item(k,v))    # add the tuple to the map
            return v
        if self._n > len(self._table) // 2:  # keep load factor <= 0.5
            # number 2^x - 1 is often prime
            self._resize(2 * len(self._table) - 1)
# t=AlterUTM()
# for i in range(5):
#    t[i]=str(i)
# for i in range(3,7):
#    temp=t.setdefault(i,i)
#    print(type(temp),temp)
# for i in range(7):
#     print(type(t[i]),t[i])

# 10.29
from TheCode.ch10.probe_hash_map import ProbeHashMap
class AlterPHM(ProbeHashMap):
    ''' rewrite setdatult method'''
    def setdefault(self,k,v):
        j = self._hash_function(k)
        found, s = self._find_slot(j, k)
        if not found:
            self._table[s] = self._Item(k, v)               # insert new item
            self._n += 1                                   # size has increased
            return v
        else:
            return self._table[s]._value                      # overwrite existing
        if self._n > len(self._table) // 2:  # keep load factor <= 0.5
            # number 2^x - 1 is often prime
            self._resize(2 * len(self._table) - 1)

# t=AlterPHM()
# for i in range(3):
#     t[i]=i
# for i in range(5):
#    temp=t.setdefault(i,str(i))
#    print(temp,type(temp))
# print(type(t.setdefault(1,2)))
# for i in range(5):
#     print(t[i],type(t[i]))

# 10.30
from TheCode.ch10.chain_hash_map import ChainHashMap
class AlterCHM(ChainHashMap):
    ''' rewrite setdatult method'''
    def setdefault(self,k,v):
        j=self._hash_function(k)
        if self._table[j]==None:   # if the item that index of map is none
            self[k]=v               # protected add the item to the map
            return v
        else:
            try:                    # if the chain can't find the item
                return self._table[j][k]
            except KeyError:
                self._table[j]._table.append(self._table[j]._Item(k,v))
                return v
        if self._n > len(self._table) //