哈弗曼编码、哈弗曼树

最新推荐文章于 2022-03-03 21:40:23 发布

H4ppyD0g

最新推荐文章于 2022-03-03 21:40:23 发布

阅读量261

点赞数 1

分类专栏：算法基础

本文链接：https://blog.csdn.net/weixin_42172261/article/details/95095368

版权

算法基础专栏收录该内容

165 篇文章 6 订阅

订阅专栏

哈弗曼编码是依赖于字符使用频率来建立的一种编码，通过把使用频率低的字符分配相对较多的01编码，而使用频率高的分配相对较低的01编码，来建立最小的带权路径长度的树，来最大化的获得编码存储空间的一种编码规则。这个树称为哈弗曼树，也称为最优二叉树。
这样可以确定每一个字符的编码不可能成为其他字符编码的坐子串，也就是当读到某一个根节点时，就可以确定这个字符已经读完了；下一个字符的编码要重新读入。原因是经过这样的编码后，每个字符都位于哈弗曼树的叶子节点(也就是到最后了)，不可能再从这个叶子节点向下延伸(因为是叶子节点)。

#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <map>
#include <algorithm>
#define INF 0x3f3f3f3f
using namespace std;

typedef struct{
	char character;
	int weight;
	int parent;
	int lchild, rchild;
}HuffmanTreeNode;
typedef HuffmanTreeNode *HuffmanTree;
typedef map<char, string> Encode;

//找两个weight最小的节点 
void FindMin(HuffmanTree T, int n, int &min1, int &min2){
	min1 = 0, min2 = 0;
	T[0].weight = INF;
	for (int i=1; i<=n; i++){
		if (T[i].parent==0 && T[i].weight<T[min1].weight){
			min1 = i;
		}
	} 
	T[min1].parent = -1;
	
	for (int i=1; i<=n; i++){
		if (T[i].parent==0 && T[i].weight<T[min2].weight){
			min2 = i;
		}
	} 
}

//创建Huffman树 
void CreadHuffmanTree(HuffmanTree *T, int n){
	*T = (HuffmanTreeNode *)malloc(sizeof(HuffmanTreeNode) * (2*n));
	if (!T){
		printf("err\n");
		exit(-1);
	}
	
	printf("enter the character and the weight:\n");
	for (int i=1; i<=n; i++){
		char s[5];
		int w;
		scanf("%s%d", s, &w);
		(*T)[i].character = s[0];
		(*T)[i].weight = w;
		(*T)[i].parent = 0;
		(*T)[i].lchild = 0, (*T)[i].rchild = 0;	//注意这里很重要 
	}

	for (int now=n+1; now<=(2*n-1); now++){
		int min1, min2;
		FindMin(*T, now-1, min1, min2);
		(*T)[now].weight = (*T)[min1].weight + (*T)[min2].weight;
		(*T)[now].lchild = min1;
		(*T)[now].rchild = min2;
		(*T)[now].parent = 0;
		(*T)[now].character = ' ';
		(*T)[min1].parent = now;
		(*T)[min2].parent = now;
	}	
}

//前序遍历输出哈夫曼树 
void PrintTree(HuffmanTree T, int root){
	printf("%c\t\t%d\n", T[root].character, T[root].weight);
	if (T[root].lchild)
		PrintTree(T, T[root].lchild);
	if (T[root].rchild)
		PrintTree(T, T[root].rchild);
}

//哈夫曼编码
void HuffmanEncode(HuffmanTree T, int n, Encode &encode){
	printf("regulation of the encode:\n");
	char regulation[32];
	int pos = 31;
	regulation[pos] = '\0';
	
	for (int i=1; i<=n; i++){
		pos = 31;
		int cur = i;
		int next = T[cur].parent;
		while (next != 0){
			if (T[next].lchild == cur)
				regulation[--pos] = '0';
			else if (T[next].rchild == cur)
				regulation[--pos] = '1';
			cur = next;
			next = T[cur].parent;
		}
		printf("%c\t\t%s\n", T[i].character, regulation+pos);
		string reg(regulation + pos);
		encode[T[i].character] = reg;
	}
} 

//哈夫曼解码
void HuffmanDecode(Encode encode, char *to_decode){
	int len = strlen(to_decode);
	char tmp[64];
	int pos = 0;
	for (int i=0; i<len; i++){
		tmp[pos++] = to_decode[i];
		tmp[pos] = '\0';
		map<char, string>::iterator iter = encode.begin();
		for (; iter!=encode.end(); iter++){
			if (iter->second == tmp){
				printf("%c", iter->first);
				pos = 0;
			}
		}
	}
} 
int main(){
	Encode encode;
	HuffmanTree T;
	int n;
	printf("enter the count:\n");
	scanf("%d", &n);
	
	CreadHuffmanTree(&T, n);
	
	printf("\n\n\n");
	printf("char and weight\n"); 
	PrintTree(T, 2*n-1);
	
	printf("\n\n\n\n");
	HuffmanEncode(T, n, encode);
	
	printf("\n\n\n\n");
	char to_decode[64]="1100110111110010100";
	HuffmanDecode(encode, to_decode);
	return 0;
} 

/*
input
6
a 45
b 13
c 12
d 16
e 9
f 5
*/

H4ppyD0g

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
打赏
0
评论
哈弗曼编码、哈弗曼树

哈弗曼编码是依赖于字符使用频率来建立的一种编码，通过把使用频率低的字符分配相对较多的01编码，而使用频率高的分配相对较低的01编码，来建立最小的带权路径长度的树，来最大化的获得编码存储空间的一种编码规则。这个树称为哈弗曼树，也称为最优二叉树。这样可以确定每一个字符的编码不可能成为其他字符编码的坐子串，也就是当读到某一个根节点时，就可以确定这个字符已经读完了；下一个字符的编码要重新读入。原因是经过...
复制链接

扫一扫