哈夫曼树的概念:
给定N个权值作为N个叶子结点,构造一棵二叉树,若该树的带权路径长度达到最小,称为哈夫曼树(Huffman Tree),也称这样的二叉树为最优二叉树。哈夫曼树是带权路径长度最短的树,权值较大的结点离根较近。
哈夫曼树的结构定义
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MAX 10000
typedef struct HTNode {
int weight;
int parent;
int lch, rch;
}HTNode;
typedef struct HTNode* HuffmanTree;
typedef char** HuffmanCode;
构造哈夫曼树
/* 在HT[k](1<=k<=i-1)中选择两个双亲域为 0 ,且权值最小的
结点,返回该两个结点在HT的序号*/
void Select(HuffmanTree HT, int length, int* e1, int* e2)
{
int minweight1, minweight2;
minweight1 = minweight2 = MAX; //MAX为大于所有weight的一个值
int index1, index2; //用来表示下标
index1 = index2 = 0;
for (int i = 1; i < length + 1; i++)
{
if (HT[i].parent == 0)
{
if (HT[i].weight < minweight1) //有比原来最小的权出现
{
//原来最小的权变为第二小的权
minweight2 = minweight1;
index2 = index1;
//新的最小的权
minweight1 = HT[i].weight;
index1 = i;
}
else if (HT[i].weight < minweight2) //出现了第二小的权
{
minweight2 = HT[i].weight;
index2 = i;
}
}
}
*e1 = index1;
*e2 = index2;
}
/* 构造哈夫曼树 */
void CreateHuffmanTree(HuffmanTree* HT, int n)
{
if (n <= 1) return;
//数组有 2n-1 个元素
int m = 2 * n - 1;
//0号位置不存元素
*HT = (HuffmanTree)malloc((m + 1)*sizeof(HTNode));
//初始化
for (int i = 1; i < m + 1; i++)
{
(*HT)[i].lch = (*HT)[i].rch = 0;
(*HT)[i].parent = 0;
}
for (int i = 1; i < n + 1; i++)
scanf("%d", &(*HT)[i].weight);
//表示权值第一小和第二小的下标
int minweightindex1, minweightindex2;
for (int i = n + 1; i < m + 1; i++)
{
Select(*HT, i - 1, &minweightindex1, &minweightindex2);
//合并,产生新的HT[i]
(*HT)[minweightindex1].parent = (*HT)[minweightindex2].parent = i;
(*HT)[i].lch = minweightindex1;
(*HT)[i].rch = minweightindex2;
(*HT)[i].weight = (*HT)[minweightindex1].weight + (*HT)[minweightindex2].weight;
}
}
哈夫曼编码的算法实现
/* 从叶子到根逆向求每个字符的哈夫曼编码,存储在编码表HC中 */
void CreateHuffmanCode(HuffmanTree HT, HuffmanCode*HC, int n)
{
(*HC) = (char**)malloc((n + 1) * sizeof(char*)); //与哈夫曼树一样,0号位置不存元素
char* cd = (char*)malloc(n * sizeof(char));//该数组用0号位置
cd[n - 1] = '\0';
for (int i = 1; i < n + 1; i++)
{
int f = HT[i].parent; //用于向上回溯
int current = i; //当前结点
int start = n - 1; //数组中最后一个位置
while (f != 0)
{
if (current == HT[f].lch) //如果当前结点是双亲的左孩子
cd[--start] = '0';
else //如果当前结点是双亲的右孩子
cd[--start] = '1';
//继续回溯
current = f;
f = HT[f].parent;
}
//(*HC)[i]的长度为末位置-初位置+1 : (n - 1) - start + 1 = n - start
(*HC)[i] = (char*)malloc((n - start) * sizeof(char));
strcpy((*HC)[i], &cd[start]); //将求得的编码从临时空间cd拷贝到HC当前行中
} //拷贝字符串,'\0'是停止拷贝,'\0'也将拷贝
free(cd); //释放
}