一、问题:
请使用哈夫曼编码方法对给定的字符串,进行编码,以满足发送的编码总长度最小,且方便译码。“AABBCCDDEEABCDDCDBAEEAAA”
二、过程:
import heapq
import collections
class Node:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
def build_frequency_table(text):
return collections.Counter(text)
def build_huffman_tree(frequencies):
priority_queue = [Node(char, freq) for char, freq in frequencies.items()]
heapq.heapify(priority_queue)
while len(priority_queue) > 1:
left = heapq.heappop(priority_queue)
right = heapq.heappop(priority_queue)
merged = Node(None, left.freq + right.freq)
merged.left = left
merged.right = right
heapq.heappush(priority_queue, merged)
return priority_queue[0]
def build_huffman_codes(root, prefix="", codebook={}):
if root is None:
return
if root.char is not None:
codebook[root.char] = prefix
return codebook
build_huffman_codes(root.left, prefix + "0", codebook)
build_huffman_codes(root.right, prefix + "1", codebook)
return codebook
def huffman_encoding(text):
frequencies = build_frequency_table(text)
root = build_huffman_tree(frequencies)
huffman_codes = build_huffman_codes(root)
encoded_text = "".join([huffman_codes[char] for char in text])
return encoded_text, huffman_codes
text = "AABBCCDDEEABCDDCDBAEEAAA"
encoded_text, huffman_codes = huffman_encoding(text)
print("原始文本:", text)
print("哈夫曼编码:", huffman_codes)
print("编码后的文本:", encoded_text)