哈夫曼树（最优二叉树）（c/c++）

最新推荐文章于 2023-05-11 16:59:00 发布

消逝者

最新推荐文章于 2023-05-11 16:59:00 发布

阅读量2.1k

点赞数 4

分类专栏：数据结构与算法文章标签：二叉树数据结构

本文链接：https://blog.csdn.net/Little_ant_/article/details/104246904

版权

数据结构与算法专栏收录该内容

47 篇文章 5 订阅

订阅专栏

哈夫曼编码

halfman! halfman! 半人万岁！（来自权力的游戏 Tyrion Lannister）
huffman coding哈夫曼编码的核心是构造哈夫曼树─即最优二叉树，带权路径长度最小的二叉树。它会使出现概率高的字符使用较短的编码，反之出现概率低的则使用较长的编码，这便使编码之后的字符串的平均期望长度降低，可用于数据的无损耗压缩。

哈夫曼树

引入

如果对多个字符进行哈夫曼编码，如 a（7），b（5），c（2），d（4）四个字符进行编码，其中括号中为该字符平均出现次数，称为权重，就要将四个字符结点作为叶子结点构造一棵二叉树，当这棵二叉树的带权路径长度最小时，最优二叉树就形成了。
结点的路径长度：该结点到根结点之间的分支数目
结点的带权路径长度：结点的路径长度与权重的乘积
树的带权路径长度：所有结点的带权路径长度之和
little ant
上图为一棵最优二叉树，结点的权值越大，越靠近根结点。该哈夫曼树的带权路径长度WPL为：7+5x2+2x3+4x3=35
它的哈夫曼编码为：

a: 0    b: 10    c: 110    d: 111

算法实现

哈夫曼树结点中的parent指针指向父节点的下标编号。

typedef struct htNode
{
	char data;//结点中的信息
	int weight;//权重
	int parent, lc, rc;//分别指向父结点，左右孩子结点
}htNode,*HuffmanTree;

哈夫曼编码保存在二维字符数组中，二维数组的每一行对应一个字符的编码，如下：

typedef char** huffmanCode;

实现哈夫曼编码的函数如下：其中存储哈夫曼结点的ht和存储哈夫曼编码的hc均为引用，调用的select（）函数找出一些结点中权重最小的两个结点来创建合适的新节点，并且有目的使最终算法实现的哈夫曼树左边结点权重总是小于右边结点的。

void huffmanCoding(HuffmanTree& ht, huffmanCode& hc,
	int* weigh, char* character, int n)
	//weigh数组保存字符的权值，character数组保存字符，n表示字符个数
{
	if (n < 2)
		return;
	int m = 2 * n - 1;//8个字符一共需要建立15个结点
	ht = new htNode[m];
	int i, s1, s2;
	for (i = 0; i < n; i++)//对叶子结点进行初始化
	{
		ht[i].weight = weigh[i];
		ht[i].data = character[i];
		ht[i].lc = ht[i].rc = ht[i].parent = 0;
	}
	while (i < m)//进行非叶子结点构造，建成哈夫曼树
	{
		select(ht, i - 1, s1, s2);//每次挑选当前结点i之前最小的parent为0的两个结点，
				//且s1表示的权值总是小于s2的，所以哈夫曼树是权值小的在左边，权值大的在右边。
		ht[s1].parent = i;
		ht[s2].parent = i;
		ht[i].lc = s1;
		ht[i].rc = s2;
		ht[i].parent = 0;//i在此作为父结点，将其parent总是要置为0的
		ht[i].weight = ht[s1].weight + ht[s2].weight;
		i++;
	}
	//哈夫曼树构造完毕
	hc = new char*[n];
	char* cd = new char[n];//备用字符数组，从后往前输入数据
	cd[n - 1] = '\0';
	for (i = 0; i <n; i++)
	{
		int start = n - 1;
		int c, f;
		for (c = i, f = ht[i].parent;
			f != 0; c = f, f = ht[f].parent)//从叶子结点到根结点计算哈夫曼编码
		{
			if (ht[f].lc == c)//左0右1
				cd[--start] = '0';
			else
				cd[--start] = '1';
		}
		hc[i] = new char[n - start];//为第i个字符创建存储空间
		c = 0;
		while (start < n)
			hc[i][c++] = cd[start++];//将cd中的字符复制到该字符的哈夫曼编码存储结构中
	}
	delete[] cd;
}

译码过程实现

void deCoding(HuffmanTree ht, int m, char* buff)//buff中为0，1组成的字符串
{
	int p = m-1;
	while (*buff != '\0'&&p != -1)//直到buff中字符被读取完毕，程序结束。
	{
		if (*buff == '0')
			p = ht[p].lc;
		else
			p = ht[p].rc;
		++buff;
		if (!ht[p].lc&&!ht[p].rc)//叶子结点输出
		{
			std::cout << ht[p].data << ' ';
			p = m-1;//又从根结点开始查找
		}
	}
}

完整示例

这边列出了一个完整的范例，是对给定的8个字符进行编码，输入字符存储在data数组中，相应的权值存储在weigh数组中，编码之后进行译码。代码如下：其中包含编码输出结果和译码结果

#include<iostream>
#define N 8
typedef struct htNode
{
	char data;//结点中的信息
	int weight;//权重
	int parent, lc, rc;//分别指向父结点，左右孩子结点
}htNode,*HuffmanTree;
typedef char** huffmanCode;
void huffmanCoding(HuffmanTree& ht, huffmanCode& hc, 
	int* weigh, char* character, int n);//构建哈夫曼树及哈夫曼编码
void select(HuffmanTree& a, int b, int& s1, int& s2);//查找下标b之前的两个最小值
void deCoding(HuffmanTree ht, int m, char* buff);//哈夫曼编码的译码过程
int main()
{
	using namespace std;
	char data[N] = { 'a', 'b', 'c', 'd', 'e', 'f','g','h' };
	int weigh[N] = { 10, 5, 2, 15, 32, 26, 19, 9 };
	HuffmanTree ht;
	huffmanCode hc;
	huffmanCoding(ht, hc, weigh, data, N);

	cout << "哈夫曼编码：" << endl;
	int count;
	for (int i = 0; i < N; i++)
	{
		count = 0;
		cout << data[i] <<": ";
		while (hc[i][count] != '\0')
			cout << hc[i][count++] << ' ';
		cout << endl;
	}

	cout << "哈夫曼译码： " << endl;
	//char* code = "0110001";//选用f，e，d的哈夫曼编码
	const char* c = "0110001";//适用于vs2017版本
	int len = strlen(c);
	char* code = new char[len+1];
	code[len] = '\0';
	for (int i = 0; i < len; ++i)
		code[i] = c[i];
		
	deCoding(ht, 2 * N - 1, code);
	cout << endl;
	return 0;
}
void huffmanCoding(HuffmanTree& ht, huffmanCode& hc,
	int* weigh, char* character, int n)
	//weigh数组保存字符的权值，character数组保存字符，n表示字符个数
{
	if (n < 2)
		return;
	int m = 2 * n - 1;//8个字符一共需要建立15个结点
	ht = new htNode[m];
	int i, s1, s2;
	for (i = 0; i < n; i++)//对叶子结点进行初始化
	{
		ht[i].weight = weigh[i];
		ht[i].data = character[i];
		ht[i].lc = ht[i].rc = ht[i].parent = 0;
	}
	while (i < m)//进行非叶子结点构造，建成哈夫曼树
	{
		select(ht, i - 1, s1, s2);//每次挑选当前结点i之前最小的parent为0的两个结点，
				//且s1表示的权值总是小于s2的，所以哈夫曼树是权值小的在左边，权值大的在右边。
		ht[s1].parent = i;
		ht[s2].parent = i;
		ht[i].lc = s1;
		ht[i].rc = s2;
		ht[i].parent = 0;//i在此作为父结点，将其parent总是要置为0的
		ht[i].weight = ht[s1].weight + ht[s2].weight;
		i++;
	}
	//哈夫曼树构造完毕
	hc = new char*[n];
	char* cd = new char[n];//备用字符数组，从后往前输入数据
	cd[n - 1] = '\0';
	for (i = 0; i <n; i++)
	{
		int start = n - 1;
		int c, f;
		for (c = i, f = ht[i].parent;
			f != 0; c = f, f = ht[f].parent)//从叶子结点到根结点计算哈夫曼编码
		{
			if (ht[f].lc == c)//左0右1
				cd[--start] = '0';
			else
				cd[--start] = '1';
		}
		hc[i] = new char[n - start];//为第i个字符创建存储空间
		c = 0;
		while (start < n)
			hc[i][c++] = cd[start++];//将cd中的字符复制到该字符的哈夫曼编码存储结构中
	}
	delete[] cd;
}
void select(HuffmanTree& a, int b, int& s1, int& s2)
{
	int i, temp1=INT_MAX, temp2=INT_MAX;
	for (i = 0; i <= b; i++)
	{
		if (a[i].parent == 0)
		{
			if (temp1 > a[i].weight)
			{
				temp1 = a[i].weight;
				s1 = i;
			}
		}
	}
		for (i = 0; i <= b; i++)
	{
		if (a[i].parent == 0 && i != s1)
		{
			if (temp2 > a[i].weight)
			{
				temp2 = a[i].weight;
				s2 = i;
			}
		}
	}
}
void deCoding(HuffmanTree ht, int m, char* buff)//buff中为0，1组成的字符串
{
	int p = m-1;
	while (*buff != '\0'&&p != -1)//直到buff中字符被读取完毕，程序结束。
	{
		if (*buff == '0')
			p = ht[p].lc;
		else
			p = ht[p].rc;
		++buff;
		if (!ht[p].lc&&!ht[p].rc)//叶子结点输出
		{
			std::cout << ht[p].data << ' ';
			p = m-1;//又从根结点开始查找
		}
	}
}

算法中所建成的哈夫曼树如图：
little ant
哎呀，忘了在图上标注权值了@_@
输出结果为：
在这里插入图片描述

消逝者

关注

4
点赞
踩
18

收藏

觉得还不错? 一键收藏
4
评论
哈夫曼树（最优二叉树）（c/c++）

哈夫曼编码halfman! halfman! 半人万岁！（权力的游戏）huffman coding哈夫曼编码的核心是构造哈夫曼树─即最优二叉树，带权路径长度最小的二叉树。它会使出现概率高的字符使用较短的编码，反之出现概率低的则使用较长的编码，这便使编码之后的字符串的平均期望长度降低，可用于数据的无损耗压缩。哈夫曼树引入如果对多个字符进行哈夫曼编码，如 a（7），b（5），c（2），d（4...
复制链接

扫一扫

专栏目录