Huffman Tree(哈夫曼树)的python实现

Huffman Tree(哈夫曼树)的python实现

创建Huffman Tree 的基础node

class node:
	def __init__(self):
		self.name = None;
		self.left = None;
		self.right = None;
		self.code = "";
		self.count = None;
	def __init__(self, name):
		self.name = name;
		self.left = None;
		self.right = None;
		self.code = "";
		self.count = None;
	def __init__(self, name, count):
		self.name = name;
		self.left = None;
		self.right = None;
		self.code = "";
		self.count = count
包含了
name(symbol)
left/right: 左右的子node
code: huffman算法实现后的symbol的code
count: 当前symbol出现的次数

定义Huffman算法

def nodesHuffman(nodes, names):
	num = 0; # 初始化集合为P0, 之后依次为P1, P2 ...
	for i in range(len(nodes)-1, -1, -1):
		if (nodes[i].count == 0):
			nodes.pop(i); # 如果出现次数为0, 则踢出huffman树, 否则会影响code, 可能会使code更复杂, 压缩率更低
	while len(nodes) >1:
		# 找出count最小的两个node
		if(nodes[1].count >= nodes[0].count):
			nodemin1 = 0;
			nodemin2 = 1;
		else:
			nodemin1 = 1;
			nodemin2 = 0;
		for i in range(2, len(nodes)):
			if(nodes[i].count < nodes[nodemin2].count):
				if(nodes[i].count < nodes[nodemin1].count):
					nodemin2 = nodemin1;
					nodemin1 = i;
				else:
					nodemin2 = i;

		# 新建父node, "P"+str(num), count为子node的和
		countNew = nodes[nodemin2].count + nodes[nodemin1].count;
		nameNew = "P"+str(num);
		num += 1;
		nodeNew = node(nameNew, countNew);
		nodeNew.left = nodes[nodemin1];
		nodeNew.right = nodes[nodemin2];

		# 从待处理的nodes中踢出已经在tree里的子node
		if (nodemin1 < nodemin2):
			nodes.pop(nodemin2);
			nodes.pop(nodemin1);
		else:
			nodes.pop(nodemin1);
			nodes.pop(nodemin2);
		# 将新的父node放入待处理的nodes
		nodes.insert(0, nodeNew);

	# 创建code的Dictionary, name对应code
	codeDic = {};

	# 给Huffman Tree里的每个node赋予code
	setCode(nodes[0], nodes, names, codeDic);

	# 返回
	return [nodes[0], codeDic];

定义赋予code的函数

def setCode(node, nodes, names, codeDic):
	# 判断左侧子node是否为空
	if (node.left != None):
		# 判断是否node属于names, 如果是则插入nodes
		if (node.left.name in names):
			nodes.insert(1, node.left);
		# 赋予code
		node.left.code = node.code + "0";
		codeDic[node.left.name] = node.left.code;
		# 给左侧子node的子node赋予code
		setCode(node.left, nodes, names, codeDic);
	# 判断右侧子node是否为空
	if (node.right != None):
		# 判断是否node属于names, 如果是则插入nodes
		if (node.right.name in names):
			nodes.insert(1, node.right);
		# 赋予code
		node.right.code = node.code + "1";
		codeDic[node.right.name] = node.right.code;
		# 给右侧子node的子node赋予code
		setCode(node.right, nodes, names, codeDic);

使用(基于.wav压缩)

import struct;
import tkinter.filedialog;
fname = tkinter.filedialog.askopenfilename();
count = {};
file = open(fname, "rb");
file2 = open("test.Huffman", "wb");
# wav中, 0-44为头文件, 这里可以不用管, 直接从44开始读取
# 算出每个symbol出现的次数
for i in range(44, slength, 2):
	temp = struct.unpack('h', s[i:i+2])[0];
	if (count.has_key(temp)):
		count[temp] += 1;
	else:
		count[temp] = 1;

# 初始化node array
keys = count.keys();
keylen = len(keys);
nodes = [0 for i in range(keylen)];
for i in range(keylen):
	nodes[i] = node(keys[i], count[keys[i]]);

# 使用Huffman算法
[nodeOrigin, codeDic] = nodesHuffman(nodes, keys);
tempNow = '';
for i in range(44, slength, 2):
	temp = struct.unpack('h', s[i:i+2])[0];
	tempNow += codeDic[temp];

# 给得到的Huffman string添加 '0' 确保是8的倍数(因为是用B的范围是-128至127需要8位)
for i in range(len(tempNow)%8):
	tempNow += '0';

# 写入压缩文件
for i in range(0, len(tempNow), 8):
	tempInt = int(tempNow[i:i+8],2);
	temp = struct.pack('B', tempInt);
	file2.write(temp);

总结

Huffman算法比较适合多symbol大频率出现的情况, setCode因为是一个recursive的函数,所以给symbol赋予code比较耗费时间

链接: https://github.com/Musou-WS/Huffman-LZW.

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值