Huffman编码-python代码实现

Huffman编码-python代码实现

统计编码:根据信源的概率分布进行可变长编码,使平均码长接近熵。
Huffman编码是一种统计编码,是消除编码冗余常用的技术。它的基本原理是按照信源符号出现概率的大小进行排序,概率大的分配短码,概率小的分配长码。

Huffman编码算法
1:将信源符号按照出现概率从小到大排序,记为p1>=p2>=…>=p{m-1}>=p{m}
2:给最小的概率赋予符号1,倒数第二小的概率赋予符号0
3:计算联合概率pi=p{m-1}+p{m},将未处理的m-2个概率与pi一起重新排序
4:重复步骤1~3,直到所有的概率都被赋予了一个符号为止

附上源码:
Huffman编码实现:huffman_code.py

import math
#节点类
class Node:
    def __init__(self, value):
        self.value = value
        self.prev = None
        self.left = None
        self.right = None
    def is_left(self):
        return self.prev.left == self

#每一个节点赋值,调用Node类默认方法初始化
def set_value(values):
    nodes = [Node(value) for value in values]
    return nodes

#创建哈夫曼树
def create_huffman_tree(nodes):
    nodes_huf = nodes[:]
    while len(nodes_huf) != 1:
        nodes_huf.sort(key=lambda object: object.value)
        #自底向上构造树
        node_left = nodes_huf.pop(0)
        node_right = nodes_huf.pop(0)
        node_prev = Node(node_left.value + node_right.value)
        node_prev.left = node_left
        node_prev.right = node_right
        node_left.prev = node_prev
        node_right.prev = node_prev
        nodes_huf.append(node_prev)
    nodes_huf[0].prev = None
    return nodes_huf[0]  #概率之和1

#哈夫曼编码
def huffman_encoding(nodes, node_huf):
    codes = [''] * len(nodes)
    for i in range(len(nodes)):
        node_tmp = nodes[i]
        while node_tmp != node_huf:
            if node_tmp.is_left():
                codes[i] = '1' + codes[i]
            else:
                codes[i] = '0' + codes[i]
            #从底向上查找
            node_tmp = node_tmp.prev
    return codes

#哈夫曼解码
def huffman_decoding(code, node_huf):
    node_tmp = node_huf
    for i in range(len(code)):
        if node_tmp.left != 0:
            if code[i] == '1':  #容易写成数字1
                node_tmp = node_tmp.left
            else:
                node_tmp = node_tmp.right
    return node_tmp.value

#计算平均码长
def average_code_length(values, codes):
    a = 0.0
    for i in range(len(codes)):
        b = values[i] * len(codes[i])
        a += b
    return a

#理论最佳平均码长
def best_code_length(values):
    a = 0.0
    for i in range(len(values)):
        b = values[i] * math.log(values[i], 2)
        a -= b
    return a

Huffman编码测试:test.py

import huffman_code

values = [0.40,0.18,0.10,0.10,0.07,0.06,0.05,0.04]
nodes = huffman_code.set_value(values)
nodes_huf = huffman_code.create_huffman_tree(nodes)
codes = huffman_code.huffman_encoding(nodes, nodes_huf)
print('{0} → {1}.'.format(values, codes))
# value = codes[2]
# decode = huffman_code.huffman_decoding(value, nodes_huf)
# print("'{0}' → {1}.".format(value, decode))
average_code_length = huffman_code.average_code_length(values, codes)
print('the average code length of Huffman coding is {0}.'.format(average_code_length))
best_code_length = huffman_code.best_code_length(values)
print('the best code length in theory is {0}.'.format(best_code_length))
  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
好的,下面是Python实现Huffman编码的示例代码: ```python # 定义节点类 class Node: def __init__(self, freq, symbol=None, left=None, right=None): self.freq = freq self.symbol = symbol self.left = left self.right = right def __lt__(self, other): return self.freq < other.freq def __eq__(self, other): if(other == None): return False if(not isinstance(other, Node)): return False return self.freq == other.freq # 计算字符频率 def get_char_freq(data): freq = {} for char in data: if char in freq: freq[char] += 1 else: freq[char] = 1 return freq # 构建Huffman树 def build_huffman_tree(freq): nodes = [] for key in freq: nodes.append(Node(freq[key], key)) heapq.heapify(nodes) while len(nodes) > 1: left = heapq.heappop(nodes) right = heapq.heappop(nodes) parent = Node(left.freq+right.freq, left.symbol+right.symbol, left, right) heapq.heappush(nodes, parent) return nodes[0] # 递归遍历Huffman树,生成编码表 def traverse_tree(node, current_code, code_dict): if node == None: return if node.symbol != None: code_dict[node.symbol] = current_code traverse_tree(node.left, current_code + "0", code_dict) traverse_tree(node.right, current_code + "1", code_dict) # 将文本编码Huffman编码 def encode_text(data, code_dict): encoded_text = "" for char in data: encoded_text += code_dict[char] return encoded_text # 将Huffman编码解码为原文本 def decode_text(encoded_text, node): decoded_text = "" current_node = node for bit in encoded_text: if bit == "0": current_node = current_node.left else: current_node = current_node.right if current_node.symbol != None: decoded_text += current_node.symbol current_node = node return decoded_text # 测试 data = "hello world" freq = get_char_freq(data) huffman_tree = build_huffman_tree(freq) code_dict = {} traverse_tree(huffman_tree, "", code_dict) encoded_text = encode_text(data, code_dict) decoded_text = decode_text(encoded_text, huffman_tree) print("Original text: ", data) print("Encoded text: ", encoded_text) print("Decoded text: ", decoded_text) ``` 这个代码可以计算给定文本的字符频率,构建Huffman树,生成编码表,对文本进行编码和解码。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值