哈弗曼编码可以用于文件无损压缩
哈弗曼所生产的的编码是前缀码即没有任何码字是其他码子的前缀(词句出于算法导论),需要前缀码是因为他的无歧义性质。
构造前缀编码的过程相当于构造一颗满二叉树(FULL BINARY TREE)注意不是平衡二叉树(BBT),而哈夫曼的树又是最优二叉树,
如果已知叶子节点数为:NL 那总共的节点为NT=NL*2-1
当已经统计好内容频率后,每一次向上构造树干都需要用到当时最小的和次小的树叶频率或者说是重量,
找出最小和次小的结点,可以用比较在都没有父母的孩子(树叶)的前提下,从而找出孩子的数量是最小的和次小的2个结点
也可以用优先队列(BUILD_MIN_HEAP)来实现 。
先无头无脑的就说这么多,以后再细细补充w
以下是代码(代码用的是比较都没有的父母的树叶来找最小和次小)
#include<stdio.h>
#define LEAF 4 //我偷懒了,这假设叶子树数也就是字符统计数为已知
#define MAX 1000
typedef struct huffmanNode{
int weight;
int parent, right, left;
char c;
}HN;
typedef struct huffmancode{
int start;
int bit[LEAF];
}HC;
HN hn[LEAF * 2 - 1];
HC hc[LEAF];
void initNode(int a[]){
int i = 0;
for (i = 0; i < LEAF * 2 - 1; i++){
if (i<LEAF){
hn[i].weight = a[i];
hn[i].c = 65+i;
}
else {
hn[i].weight = 0;
hn[i].c = 0;
}
hn[i].parent = -1;
hn[i].left = -1;
hn[i].right = -1;
}
}
void createHuffmanTree(){
int min1, min2, lchild, rchild;
for (int i = LEAF; i<LEAF * 2 - 1; i++){
min1 = min2 = MAX;
lchild = rchild=-1;
for (int j = 0; j<i; j++){
if (hn[j].parent == -1 && hn[j].weight<min1){
min2 = min1;
lchild = j;
min1 = hn[j].weight;
}
else if (hn[j].parent == -1 && hn[j].weight<min2){
rchild = j;
min2 = hn[j].weight;
}
}
hn[lchild].parent = hn[rchild].parent = i;
hn[i].left = lchild;
hn[i].right = rchild;
hn[i].weight = min1+min2;
}
}
int checkNode(int lc, int c){
if (lc == c)return 0;
else return 1;
}
void huffmanEncoding(){
int f, c, i,start;
for (i = 0; i<LEAF; i++){
start = LEAF - 1;
for (c = i, f = hn[i].parent; f != -1; c = f, f = hn[f].parent) hc[i].bit[start--] = checkNode(hn[f].left, i);
hc[i].start = start +1;
}
}
void toPrint(){
for (int i = 0; i < LEAF; i++){
printf("%c :", hn[i].c);
for (int j = hc[i].start; j < LEAF; j++) printf("%d", hc[i].bit[j]);
printf("\n");
}
}
void huffmanDecoding(char* buffer,int j ){
if (hn[j].left == -1 && hn[j].right == -1){
printf("%c ", hn[j].c);
return huffmanDecoding(buffer, 2*LEAF-2);
}
if (*buffer - 48 == 0)return huffmanDecoding(buffer+1,hn[j].left);
else if (*buffer - 48 == 1)return huffmanDecoding(buffer+1 ,hn[j].right);
else return;
}
int main(){
char buffer[30];
int a[LEAF] = { 7, 5, 2, 4 };//假设数组a仅为字母频率表且是按照字母顺序排列的
initNode(a);
createHuffmanTree();
huffmanEncoding();
toPrint();
printf("enter the code to decrypt");
scanf("%[^\n]%*c", buffer);
huffmanDecoding(buffer, 2*LEAF-2);
}
例子输出: