代码实现:
import numpy as np
#
class BinaryTreeNode(object):
def __init__(self,value):
self.left = None # 左子树
self.right = None # 右子树
self.value = value # the value of word
#
class HuffmanTreeNode(BinaryTreeNode):
'''
common part of leaf node and tree node
'''
def __init__(self,value,possibility):
'''
:param value: value of leaf node will be the word, and be mid vector in tree node.
:param possibility: it is the frequency of word in the corpus.
'''
super(HuffmanTreeNode, self).__init__(value)
self.possibility = possibility
self.Huffman = "" # store the huffman code
#
class HuffmanTree():
def __init__(self, word_dict, vec_len=500):
self.vec_len = vec_len # the length of word vector
self.root = None
word_dict_list = list(word_dict.values())
node_list = [HuffmanTreeNode(x['word'],x['possibility']) for x in word_dict_list] #对词典中的每个词都产生对应的HuffmanTreeNode对象,以便进一步处理
self.build_tree(node_list)
# self.build_CBT(node_list)
#self.generate_huffman_code(self.root, word_dict)
#
def merge(self,node1,node2):
top_pos = node1.possibility + node2.possibility # 将概率相加
top_node = HuffmanTreeNode(np.zeros([1,self.vec_len]), top_pos)
if node1.possibility >= node2.possibility :
top_node.left = node1 #左边是大的
top_node.right = node2
else:
top_node.left = node2
top_node.right = node1
return top_node
#
def build_tree(self,node_list):
while node_list.__len__()>1:
i1 = 0 # i1表示概率最小的节点
i2 = 1 # i2 概率第二小的节点
if node_list[i2].possibility < node_list[i1].possibility :
[i1,i2] = [i2,i1]
for i in range(2,node_list.__len__()): # 找到最小的两个节点
if node_list[i].possibility<node_list[i2].possibility :
i2 = i
if node_list[i2].possibility < node_list[i1].possibility :
[i1,i2] = [i2,i1]
top_node = self.merge(node_list[i1],node_list[i2]) # 合并两个节点,生成新的中间节点
if i1<i2: # 删除两个旧节点
node_list.pop(i2)
node_list.pop(i1)
elif i1>i2:
node_list.pop(i1)
node_list.pop(i2)
else:
raise RuntimeError('i1 should not be equal to i2')
node_list.insert(0,top_node) # 插入新节点
self.root = node_list[0]
#
def generate_huffman_code(self, node, word_dict):
stack = [self.root]
while (stack.__len__()>0):
node = stack.pop()
# go along left tree
while node.left or node.right :
code = node.Huffman
node.left.Huffman = code + "1" #大的给1
node.right.Huffman = code + "0"
stack.append(node.right)
node = node.left
word = node.value
code = node.Huffman
# print(word,'\t',code.__len__(),'\t',node.possibility)
word_dict[word]['Huffman'] = code
return word_dict
#