# coding: utf-8
#Tree-Node Type
class Node:
def __init__(self,freq):
self.left = None
self.right = None
self.father = None
self.freq = freq
def __repr__(self): #change the string representation of instances,see cookbook 8.1
return 'Node({0.freq!r})'.format(self)
def isLeft(self):
return self.father.left == self
#create nodes 创建叶子节点
def createNodes(freqs):
return [Node(freq) for freq in freqs]
#create Huffman-Tree 创建Huffman树
def createHuffmanTree(nodes):
queue = nodes[:] #copy of the nodes
while len(queue) > 1:
queue.sort(key=lambda item:item.freq) #sort the objects by certain attribute
node_left = queue.pop(0)
node_right = queue.pop(0)
node_father = Node(node_left.freq + node_right.freq)
node_father.left = node_left
node_father.right = node_right
node_left.father = node_father
node_right.father = node_father
queue.append(node_father)
queue[0].father = None #self.father = None,可省去
return queue[0] #root node
#Huffman编码
def huffmanEncoding(nodes,root):
codes = [''] * len(nodes) #叶子节点数即为编码数
for i in range(len(nodes)):
node_tmp = nodes[i]
while node_tmp != root:
if node_tmp.isLeft():
codes[i] = '0' + codes[i]
else:
codes[i] = '1' + codes[i]
node_tmp = node_tmp.father
return codes
if __name__ == '__main__':
chars = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N']
freqs = [10,4,2,5,3,4,2,6,4,4,3,7,9,6]
chars_freqs = zip(chars,freqs)
nodes = createNodes([item[1] for item in chars_freqs])
root = createHuffmanTree(nodes)
codes = huffmanEncoding(nodes,root)
chars_codes = zip(chars_freqs,codes)
chars_codes.sort(key=lambda item:item[0][1]) #sort the result by the freqs
for item in chars_codes:
print 'Character:%s freq:%-2d encoding: %s' % (item[0][0],item[0][1],item[1])
Character:C freq:2 encoding: 10100
Character:G freq:2 encoding: 10101
Character:E freq:3 encoding: 0000
Character:K freq:3 encoding: 0001
Character:B freq:4 encoding: 0100
Character:F freq:4 encoding: 0101
Character:I freq:4 encoding: 0110
Character:J freq:4 encoding: 0111
Character:D freq:5 encoding: 1011
Character:H freq:6 encoding: 1110
Character:N freq:6 encoding: 1111
Character:L freq:7 encoding: 001
Character:M freq:9 encoding: 100
Character:A freq:10 encoding: 110
注:
1)__repr__修改instance的string representation,方便调试,见python cookbook 8.1节
2)line22和line55 根据提供的key参数对列表进行排序
3)构建Node类,体会OO编程
4)其他实现:Rosettacode