哈弗曼编码是依赖于字符使用频率来建立的一种编码,通过把使用频率低的字符分配相对较多的01编码,而使用频率高的分配相对较低的01编码,来建立最小的带权路径长度的树,来最大化的获得编码存储空间的一种编码规则。这个树称为哈弗曼树,也称为最优二叉树。
这样可以确定每一个字符的编码不可能成为其他字符编码的坐子串,也就是当读到某一个根节点时,就可以确定这个字符已经读完了;下一个字符的编码要重新读入。原因是经过这样的编码后,每个字符都位于哈弗曼树的叶子节点(也就是到最后了),不可能再从这个叶子节点向下延伸(因为是叶子节点)。
#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <map>
#include <algorithm>
#define INF 0x3f3f3f3f
using namespace std;
typedef struct{
char character;
int weight;
int parent;
int lchild, rchild;
}HuffmanTreeNode;
typedef HuffmanTreeNode *HuffmanTree;
typedef map<char, string> Encode;
//找两个weight最小的节点
void FindMin(HuffmanTree T, int n, int &min1, int &min2){
min1 = 0, min2 = 0;
T[0].weight = INF;
for (int i=1; i<=n; i++){
if (T[i].parent==0 && T[i].weight<T[min1].weight){
min1 = i;
}
}
T[min1].parent = -1;
for (int i=1; i<=n; i++){
if (T[i].parent==0 && T[i].weight<T[min2].weight){
min2 = i;
}
}
}
//创建Huffman树
void CreadHuffmanTree(HuffmanTree *T, int n){
*T = (HuffmanTreeNode *)malloc(sizeof(HuffmanTreeNode) * (2*n));
if (!T){
printf("err\n");
exit(-1);
}
printf("enter the character and the weight:\n");
for (int i=1; i<=n; i++){
char s[5];
int w;
scanf("%s%d", s, &w);
(*T)[i].character = s[0];
(*T)[i].weight = w;
(*T)[i].parent = 0;
(*T)[i].lchild = 0, (*T)[i].rchild = 0; //注意这里很重要
}
for (int now=n+1; now<=(2*n-1); now++){
int min1, min2;
FindMin(*T, now-1, min1, min2);
(*T)[now].weight = (*T)[min1].weight + (*T)[min2].weight;
(*T)[now].lchild = min1;
(*T)[now].rchild = min2;
(*T)[now].parent = 0;
(*T)[now].character = ' ';
(*T)[min1].parent = now;
(*T)[min2].parent = now;
}
}
//前序遍历输出哈夫曼树
void PrintTree(HuffmanTree T, int root){
printf("%c\t\t%d\n", T[root].character, T[root].weight);
if (T[root].lchild)
PrintTree(T, T[root].lchild);
if (T[root].rchild)
PrintTree(T, T[root].rchild);
}
//哈夫曼编码
void HuffmanEncode(HuffmanTree T, int n, Encode &encode){
printf("regulation of the encode:\n");
char regulation[32];
int pos = 31;
regulation[pos] = '\0';
for (int i=1; i<=n; i++){
pos = 31;
int cur = i;
int next = T[cur].parent;
while (next != 0){
if (T[next].lchild == cur)
regulation[--pos] = '0';
else if (T[next].rchild == cur)
regulation[--pos] = '1';
cur = next;
next = T[cur].parent;
}
printf("%c\t\t%s\n", T[i].character, regulation+pos);
string reg(regulation + pos);
encode[T[i].character] = reg;
}
}
//哈夫曼解码
void HuffmanDecode(Encode encode, char *to_decode){
int len = strlen(to_decode);
char tmp[64];
int pos = 0;
for (int i=0; i<len; i++){
tmp[pos++] = to_decode[i];
tmp[pos] = '\0';
map<char, string>::iterator iter = encode.begin();
for (; iter!=encode.end(); iter++){
if (iter->second == tmp){
printf("%c", iter->first);
pos = 0;
}
}
}
}
int main(){
Encode encode;
HuffmanTree T;
int n;
printf("enter the count:\n");
scanf("%d", &n);
CreadHuffmanTree(&T, n);
printf("\n\n\n");
printf("char and weight\n");
PrintTree(T, 2*n-1);
printf("\n\n\n\n");
HuffmanEncode(T, n, encode);
printf("\n\n\n\n");
char to_decode[64]="1100110111110010100";
HuffmanDecode(encode, to_decode);
return 0;
}
/*
input
6
a 45
b 13
c 12
d 16
e 9
f 5
*/