数据结构Python实现之哈希表,字典以及集合

哈希表:

class Array(object):#定义一个数组,用于实现哈希表
    def __init__(self,size=32,init=None):
        self._size = size
        self._items = [init] * size
    def __getitem__(self, index):
        return self._items[index]
    def __setitem__(self, index, value):
        self._items[index] = value
    def __len__(self):
        return self._size
    def clear(self,value=None):
        for i in range(len(self._items)):
            self._items[i] = value
    def __iter__(self):
        for item in self._items:
            yield item

'''
定义一个hash表数组的槽
注意,一个槽有三种状态。
1.从未使用HashTable.UNUSED。此槽没有被使用和冲突过,查找时只要找到UNUSED就不用再继续探查了
2.使用过但是remove了,此时是HashTable.EMPTY,该探查点后边的元素扔可能是有key
3.槽正在使用Slot节点
'''
class Slot(object):
    def __init__(self,key,value):
        self.key = key
        self.value = value
#定义一个哈希表类
class HashTable(object):
    UNUSED = None #槽没被使用过(槽指的是数组中的一个位置)
    EMPTY = Slot(None,None)#使用过却被删除了

    def __init__(self):
        self._table = Array(8,init=HashTable.UNUSED)#初始化槽没被使用过
        self.length = 0
    @property
    def _load_factor(self):#装载因子(load factor),当装载因子超过某一阈值时,需要进行重哈希操作
        return self.length/float(len(self._table))
    def __len__(self):#哈希表的长度
        return self.length
    def _hash(self,key):#定义hash函数,返回哈希表数组的索引
        return abs(hash(key))%len(self._table)
    def _find_key(self,key):#搜寻key键,如果存在,返回key键对应的索引,否则返回None
        index = self._hash(key)
        _len = len(self._table)
        while self._table[index] is not HashTable.UNUSED:
            if self._table[index] is HashTable.EMPTY:
                index = (index*5 +1)%_len#解决哈希冲突的一种方式
                continue
            elif self._table[index].key == key:
                return index
            else:
                index = (index*5 + 1)%_len
        return None
    def _find_slot_for_insert(self,key):#寻找哈希表中可以插入的槽,
        index = self._hash(key)
        _len = len(self._table)
        while not self._slot_can_insert(index):
            index = (index * 5 + 1)%_len#解决哈希冲突的一种方式
        return  index
    def _slot_can_insert(self,index):#判断发现的槽能否插入,空槽或从未被使用即返回True
        return (self._table[index] is HashTable.EMPTY or self._table[index] is HashTable.UNUSED)
    def __contains__(self, key):#in 操作符,判断key是否在hash表里,如果存在返回其索引,否则返回None
        index = self._find_key(key)
        return index is not None
    def add(self,key,value):#添加元素操作,有 key 则更新,否则插入
        if key in self:#调用__contains__判断key是否在哈希表中
            index = self._find_key(key)#搜寻key的索引
            self._table[index].value = value#更新哈希表中key对应的value
            return False
        else:#如果key不再hash表中,则添加元素Slot(key,value),并更新长度
            index = self._find_slot_for_insert(key)
            self._table[index] = Slot(key,value)
            self.length += 1
            if self._load_factor >= 0.8:#如果装载因子大于0.8,则进行重哈希操作
                self._rehash()
            return True

# '''
# 重哈希(Rehashing),
# 步骤就是重新开辟一块新的空间。本函数里的策略是扩大为已经使用的槽数目的两倍。
# 开辟了新空间以后,会把原来哈希表里不为空槽的数据重新插入到新的哈希表里,
# 插入方式和之前一样。这就是 rehashing 操作。
# '''
    def _rehash(self):
        old_table = self._table
        newsize = len(self._table) * 2
        self._table = Array(newsize,HashTable.UNUSED)#定义新的哈希表
        self.length = 0#初始长度为0
        for slot in old_table:#把原来哈希表里不为空槽的数据重新插入到新的哈希表里
            if slot is not HashTable.UNUSED and slot is not HashTable.EMPTY:
                index = self._find_slot_for_insert(slot.key)
                self._table[index]=slot
                self.length += 1
    def get(self,key,default = None):#获取 key 的值,不存在返回默认值 None
        index = self._find_key(key)
        if index is None:
            return default
        else:
            return self._table[index].value
    def remove(self,key):#删除一个 key,这里其实不是真删除,而是标记为 Empty
        index = self._find_key(key)
        if index is None:
            raise KeyError()
        value = self._table[index].value
        self.length -= 1
        self._table[index] = HashTable.EMPTY
        return value
    def __iter__(self):
        for slot in self._table:
            if slot not in (HashTable.EMPTY,HashTable.UNUSED):
                yield slot.key
'''
添加操作:
h = HashTable()
h.add('a', 0)
h.add('b', 1)
h.add('c', 2)
print('hash表的长度为:',len(h))
print(h.get('a')==0)
print(h.get('b')==1)
print(h.get('c')==2)
print(h.get('w') is None)
print('遍历输出:')
for i in h:
    print(i,':',h.get(i),end=',')

输出:
hash表的长度为: 3
True
True
True
True
遍历输出:
b : 1,c : 2,a : 0,
'''
'''
删除操作:
h = HashTable()
h.add('a', 0)
h.add('b', 1)
h.add('c', 2)
h.add('x', 10)
h.add('y', 11)
h.add('z', 12)
print('hash表的长度为:',len(h))
print('get()返回值:',h.get('a'),';','remove()返回值:',h.remove('a'))
print('get()返回值:',h.get('c'),';','remove()返回值:',h.remove('c'))
print('get()返回值:',h.get('x'),';','remove()返回值:',h.remove('x'))
print('删除操作后hash表的长度为:',len(h))
print('遍历输出:')
for i in h:
    print(i,':',h.get(i),end=',')
输出:
hash表的长度为: 6
get()返回值: 0 ; remove()返回值: 0
get()返回值: 2 ; remove()返回值: 2
get()返回值: 10 ; remove()返回值: 10
删除操作后hash表的长度为: 3
遍历输出:
y : 11,z : 12,b : 1,
'''
'''
重哈希
h = HashTable()#哈希表的默认大小为8
for i in range(20):#添加元素超过装载因子,此时会进行重哈希操作
    h.add(i, i)
for i in range(20):
    print(h.get(i)==i,end=',')

print('hash表的长度为:',len(h))

输出:
True,True,True,True,True,True,True,True,
True,True,True,True,True,True,True,True,
True,True,True,True,
hash表的长度为: 20
'''

字典:

class Array(object):
    def __init__(self,size=32,init=None):
        self._size = size
        self._items = [init] * size
    def __getitem__(self, index):
        return self._items[index]
    def __setitem__(self, index, value):
        self._items[index] = value
    def __len__(self):
        return self._size
    def clear(self,value=None):
        for i in range(len(self._items)):
            self._items[i] = value
    def __iter__(self):
        for item in self._items:
            yield item
class Slot(object):
    def __init__(self,key,value):
        self.key = key
        self.value = value

class Hashable(object):
    UNUSED = None
    EMPTY = Slot(None,None)

    def __init__(self):
        self._table = Array(8,init=Hashable.UNUSED)
        self.length = 0
    @property
    def _load_factor(self):
        return self.length/float(len(self._table))
    def __len__(self):
        return self.length
    def _hash(self,key):
        return abs(hash(key))%len(self._table)
    def _find_key(self,key):
        index = self._hash(key)
        _len = len(self._table)
        while self._table[index] is not Hashable.UNUSED:
            if self._table[index] is Hashable.EMPTY:
                index = (index * 5 + 1 )%_len
                continue
            elif self._table[index].key==key:
                return index
            else:
                index = (index * 5 + 1)%_len
        return None
    def _find_slot_for_insert(self,key):
        index = self._hash(key)
        _len = len(self._table)
        while not self._slot_can_insert(index):
            index = (index * 5 + 1)%_len
        return index
    def _slot_can_insert(self,index):
        return (self._table[index] is Hashable.EMPTY or self._table[index] is Hashable.UNUSED)
    def __contains__(self, key):
        index = self._find_key(key)
        return index is not None
    def add(self,key,value):
        if key in self:
            index = self._find_key(key)
            self._table[index].value = value
            return False
        else:
            index = self._find_slot_for_insert(key)
            self._table[index] = Slot(key,value)
            self.length += 1
            if self._load_factor>=0.8:
                self._rehash()
            return True
    def _rehash(self):
        old_table = self._table
        newsize = len(self._table)*2
        self._table = Array(newsize,Hashable.UNUSED)
        self.length = 0
        for slot in old_table:
            if slot is not Hashable.UNUSED and slot is not Hashable.EMPTY:
                index = self._find_slot_for_insert(slot.key)
                self._table[index]=slot
                self.length += 1
    def get(self,key,default=None):
        index = self._find_key(key)
        if index is None:
            return default
        else:
            return self._table[index].value
    def remove(self,key):
        index = self._find_key(key)
        if index is None:
            raise KeyError()
        value = self._table[index].value
        self.length -= 1
        self._table[index] = Hashable.EMPTY
        return value
    def __iter__(self):
        for slot in self._table:
            if slot not in (Hashable.EMPTY,Hashable.UNUSED):
                yield slot.key

class Dict(Hashable):

    def _iter_slot(self):
        for slot in self._table:
            if slot not in (Hashable.EMPTY,Hashable.UNUSED):
                yield slot
    def __setitem__(self, key, value):
        self.add(key,value)
    def __getitem__(self, key):
        if key not in self:
            raise KeyError()
        else:
            return self.get(key)
    def items(self):
        for slot in self._iter_slot():
            yield (slot.key,slot.value)
    def keys(self):
        for slot in self._iter_slot():
            yield slot.key
    def values(self):
        for slot in self._iter_slot():
            yield slot.value
# '''
# 通过指定的key值,删除字典的一个键值对
# 返回被删除的key对应的value
# '''
    def pop(self,key):
        return self.remove(key)
    def clear(self):#清空字典
        for index in range(len(self._table)):
            self._table[index] = Hashable.EMPTY
    def update(self,other):
        self_list=[]
        other_list=[]
        for key,value in self.items():
            self_list.append((key,value))
        for key,value in other.items():
            other_list.append((key,value))
        copy_list = self_list.copy()
        copy_list.extend(other_list)
        for key_value in set(copy_list):
            if key_value in other_list and key_value not in self_list:
                self.add(key_value[0],key_value[1])
            else:
                continue


'''
d = Dict()
d['a']=1
d['b']=2
d['c']=3
d['d']=4
d['e']=5
print('字典长度:',len(d))
print('遍历字典:')
for key,value in d.items():
    print((key,value))
print('key,get(key)获取键值对:')
for key in d.keys():
    print((key,d.get(key)))

输出:
字典长度: 5
遍历字典:
('d', 4)
('b', 2)
('e', 5)
('a', 1)
('c', 3)
key,get(key)获取键值对:
('d', 4)
('b', 2)
('e', 5)
('a', 1)
('c', 3)
'''

'''
d = Dict()
d['a']=1
d['b']=2
d['c']=3
d['d']=4
d['e']=5
d.pop('a')
d.pop('d')
for key,value in d.items():
    print((key,value))
输出:
('e', 5)
('b', 2)
('c', 3)
'''
'''
d = Dict()
d['a']=1
d['b']=2
d['c']=3
d['d']=4
d['e']=5
d.clear()
for key,value in d.items():
    print((key,value))
'''
'''
当d1和d2的键没有交集的时候
d1=Dict()
d2=Dict()
d1['a']=1
d1['b']=2
d1['c']=3
d2['x']=11
d2['y']=22
d2['z']=33
print('d1:')
for key,value in d1.items():
    print((key,value))
print('d2:')
for key,value in d2.items():
    print((key,value))
d1.update(d2)
print('更新后的d1:')
for key,value in d1.items():
    print((key,value))
输出:
d1:
('c', 3)
('a', 1)
('b', 2)
d2:
('y', 22)
('z', 33)
('x', 11)
更新后的d1:
('c', 3)
('y', 22)
('a', 1)
('z', 33)
('b', 2)
('x', 11)

'''
'''
当d1和d2的键有交集的时候
d1=Dict()
d2=Dict()
d1['a']=1
d1['b']=2
d1['c']=3
d2['a']=1
d2['b']=22
d2['x']=33
print('d1:')
for key,value in d1.items():
    print((key,value))
print('d2:')
for key,value in d2.items():
    print((key,value))
d1.update(d2)
print('更新后的d1:')
for key,value in d1.items():
    print((key,value))
输出:
d1:
('a', 1)
('c', 3)
('b', 2)
d2:
('a', 1)
('x', 33)
('b', 22)
更新后的d1:
('a', 1)
('c', 3)
('x', 33)
('b', 22)
'''


集合:

class Set(HashTable):
    def add(self,key):
        # 集合其实就是一个 dict,只不过我们把它的 value 设置成 1
        return super(Set,self).add(key,True)
    def __and__(self, other):
        #交集: A & B,表示同时在 A 和 B 中的元素。 python 中重载  `__and__` 实现
        new_set = Set()
        for element_a in self:
            if element_a in other:
                new_set.add(element_a)
        return new_set
    def __sub__(self, other):
        # 差集:  A - B,表示在 A 中但是不在 B 中的元素。 python 中重载 `__sub__` 实现
        new_set = Set()
        for element_a in self:
            if element_a not in other:
                new_set.add(element_a)
        return new_set
    def __or__(self, other):
        #并集: A | B,表示在 A 或者 B 中的元素,两个集合相加。python 中重载 `__or__` 实现
        new_set = Set()
        for element_a in self:
            new_set.add(element_a)
        for element_b in other:
            new_set.add(element_b)
        return new_set
    def __xor__(self, other):#对称差: A ^ B,返回在 A 或 B 但是不在 A、B 中都出现的元素。其实就是 (A|B) - (A&B), python 中重载 `__xor__` 实现
        new_set= Set()
        for element_a in self:
            if element_a not in other:
                new_set.add(element_a)
        for element_b in other:
            if element_b not in self:
                new_set.add(element_b)
        return new_set
    def pop(self):#删除集合中的第一个元素
        for element in self:
            self.remove(element)
            break
    def remove(self, key):#删除集合中指定的元素
        super(Set,self).remove(key)
'''
remove()操作
s1=Set()
s1.add(1)
s1.add(6)
s1.add(4)
s1.add(9)
s1.add(3)
s1.add(8)
print(list(s1))
s1.remove(8)
print(list(s1))
s1.remove(1)
print(list(s1))
s1.remove(4)
print(list(s1))
s1.remove(9)
print(list(s1))
输出:
[8, 1, 3, 4, 6, 9]
[1, 3, 4, 6, 9]
[3, 4, 6, 9]
[3, 6, 9]
[3, 6]
'''
'''
pop()操作
s1=Set()
s1.add(1)
s1.add(6)
s1.add(4)
s1.add(9)
s1.add(3)
s1.add(8)
print(list(s1))
s1.pop()
print(list(s1))
s1.pop()
print(list(s1))
s1.pop()
print(list(s1))
s1.pop()
print(list(s1))
s1.pop()
print(list(s1))
输出:
[8, 1, 3, 4, 6, 9]
[1, 3, 4, 6, 9]
[3, 4, 6, 9]
[4, 6, 9]
[6, 9]
[9]
'''
'''
s1=Set()
s1.add(1)
s1.add(2)
s1.add(3)
print(1 in s1)
s2=Set()
s2.add(1)
s2.add(0)
s2.add(3)
s2.add(7)
s2.add(8)
s2.add(9)
print('s1和s2的交集:')
print(list(s1 & s2))
print('s1和s2的差集:')
print(list(s1 - s2))
print('s2和s1的差集:')
print(list(s2 - s1))
print('s1和s2的并集:')
print(list(s2 | s1))
print('s1和s2的对称差:')
print(list(s1 ^ s2))
输出:
True
s1和s2的交集:
[1, 3]
s1和s2的差集:
[2]
s2和s1的差集:
[0, 9, 8, 7]
s1和s2的并集:
[0, 1, 2, 3, 7, 8, 9]
s1和s2的对称差:
[0, 9, 2, 8, 7]
'''
  • 1
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值