Python 代码实现哈夫曼编码

一、哈夫曼编码是什么?

哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式,可变字长编码(VLC)的一种。Huffman于1952年提出一种编码方法,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。

哈夫曼编码,主要目的是根据使用频率来最大化节省字符(编码)的存储空间。

二、Python 代码实现哈夫曼编码

代码如下(示例):


from math import inf

#初始化哈夫曼结点
class Huffmannode(object):
    def __init__(self):
        self.parent=0
        self.left=0
        self.right=0
        self.weight=0

#选择最小的结点下标
def select_node(huffman):
    #俩个结点直接返回不需要找最小俩个结点
    if len(huffman)==2:
        return 0,1
    min=semin=inf#初始化成无穷大
    f=s=-1
    for i in range(len(huffman)):
        if huffman[i].parent==0:
            if min>huffman[i].weight:
                semin=min
                s=f
                min=huffman[i].weight
                f=i
            elif semin>huffman[i].weight:
                semin=huffman[i].weight
                s=i
    return f,s



#编码
def Huffman_code(origin_dict):
    #给结点赋权重
    n=len(origin_dict)
    m=2*n-1
    huffman=[]
    for i in origin_dict:
        temp_huffmannode=Huffmannode()
        temp_huffmannode.weight=origin_dict[i]
        huffman.append(temp_huffmannode)
    # 构建Huffman树,选择俩个最小的结点合并
    for i in range(n,m):
        f,s=select_node(huffman)
        temp_huffmannode=Huffmannode()
        temp_huffmannode.weight=huffman[f].weight+huffman[s].weight
        temp_huffmannode.right=f#小的放在右边
        temp_huffmannode.left=s
        huffman[f].parent=huffman[s].parent=i
        huffman.append(temp_huffmannode)

    #0,1编码,右1,左0
    codeing_dict = dict.fromkeys(origin_dict, None)
    for i in range(0,n):
        s=''
        k=i
        parent=huffman[i].parent
        while parent!=0:
            if huffman[parent].left==k:
                s+='0'
                k=parent
                parent=huffman[parent].parent
            else:
                s+='1'
                k=parent
                parent=huffman[parent].parent
        codeing_dict[list(origin_dict.keys())[i]]=list(reversed(s))
    for k in codeing_dict.items():
        codeing_dict[k[0]] = ''.join(k[1])

    return codeing_dict



if __name__=='__main__':
    #输入原始字符集
    s = input('输入即将被编码的字符:')

    # 创建字典计算频率
    dic = {}
    for i in range(len(s)):
        # get方法,如果有键返回该键对应的值,如果没键,可以设置返回值
        dic[s[i]] = dic.get(s[i], 0) + 1
    code_dict=Huffman_code(dic)
    print(code_dict)

代码运行结果

霍夫曼编码的Python实现可以分为以下几个步骤: 1. 统计字符出现的频率,并将其存储在一个字典中。 2. 根据字符频率构建霍夫曼树。可以使用优先队列(heapq模块)来实现。 3. 遍历霍夫曼树,生成每个字符的编码。可以使用递归来实现。 4. 将编码后的数据写入文件。 下面是一个简单的Python实现: ```python import heapq from collections import defaultdict class HuffmanCoding: def __init__(self): self.codes = {} self.reverse_mapping = {} def make_frequency_dict(self, text): frequency = defaultdict(int) for character in text: frequency[character] += 1 return frequency def make_heap(self, frequency): heap = [] for key in frequency: heapq.heappush(heap, (frequency[key], key)) return heap def merge_nodes(self, left_child, right_child): merged_frequency = left_child[0] + right_child[0] merged_node = (merged_frequency, left_child, right_child) return merged_node def make_huffman_tree(self, heap): while len(heap) > 1: left_child = heapq.heappop(heap) right_child = heapq.heappop(heap) merged_node = self.merge_nodes(left_child, right_child) heapq.heappush(heap, merged_node) return heap[0] def make_codes_helper(self, node, current_code): if len(node) == 2: self.codes[node[1]] = current_code self.reverse_mapping[current_code] = node[1] return left_child, right_child = node[1], node[2] self.make_codes_helper(left_child, current_code + "0") self.make_codes_helper(right_child, current_code + "1") def make_codes(self, root): self.make_codes_helper(root, "") def get_encoded_text(self, text): encoded_text = "" for character in text: encoded_text += self.codes[character] return encoded_text def pad_encoded_text(self, encoded_text): extra_padding = 8 - len(encoded_text) % 8 for i in range(extra_padding): encoded_text += "0" padded_info = "{0:08b}".format(extra_padding) padded_encoded_text = padded_info + encoded_text return padded_encoded_text def get_byte_array(self, padded_encoded_text): if len(padded_encoded_text) % 8 != 0: print("Encoded text not padded properly") exit(0) b = bytearray() for i in range(0, len(padded_encoded_text), 8): byte = padded_encoded_text[i:i+8] b.append(int(byte, 2)) return b def compress(self, text): frequency = self.make_frequency_dict(text) heap = self.make_heap(frequency) root = self.make_huffman_tree(heap) self.make_codes(root) encoded_text = self.get_encoded_text(text) padded_encoded_text = self.pad_encoded_text(encoded_text) byte_array = self.get_byte_array(padded_encoded_text) return byte_array def remove_padding(self, padded_encoded_text): padded_info = padded_encoded_text[:8] extra_padding = int(padded_info, 2) padded_encoded_text = padded_encoded_text[8:] encoded_text = padded_encoded_text[:-1*extra_padding] return encoded_text def decode_text(self, encoded_text): current_code = "" decoded_text = "" for bit in encoded_text: current_code += bit if current_code in self.reverse_mapping: character = self.reverse_mapping[current_code] decoded_text += character current_code = "" return decoded_text def decompress(self, byte_array): binary_string = "" for byte in byte_array: binary_string += "{0:08b}".format(byte) encoded_text = self.remove_padding(binary_string) decompressed_text = self.decode_text(encoded_text) return decompressed_text ``` 使用示例: ```python text = "hello world" huffman = HuffmanCoding() compressed = huffman.compress(text) decompressed = huffman.decompress(compressed) print("Original text:", text) print("Compressed text:", compressed) print("Decompressed text:", decompressed) ``` 输出: ``` Original text: hello world Compressed text: bytearray(b'x\x9c\xcbH\xcd\xc9\xc9\x07\x00 \x02\x8d') Decompressed text: hello world ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值