哈夫曼
构造哈夫曼树的原则:
- 权值越大的叶节点越靠近根节点
- 权值越小的叶节点越远离根节点
哈夫曼树的构造过程
- 用给定的 n 个权值{w1,w2,…,wn}构造 n 棵二叉树并构成的森林F={T1,T2,…,Tn}
- 在F中选取根节点的权值最小和次小的两棵二叉树作为左右子树构造一颗新的二叉树,这棵树的二叉树根节点为其左右子树的根节点权值之和。
- 在集合F中删除作为左右子树的两棵二叉树,并将新建立的二叉树加入到集合F中。
哈夫曼编码
规定哈夫曼树中的左分支0,右分支1,则从根节点到每一个叶节点所经过的分支对应的0和1组成的序列便为该节点对应字符的编码。这样的编码称为哈夫曼编码。
哈夫曼编码的特点 :权值越大的字符编码越短,反之越长。
哈夫曼树和哈夫曼编码的存储实现
typedef struct
{
unsigned int weight; /*权值*/
unsigned int parent, lchild, rchild; /*双亲,左孩子,右孩子*/
}HTNode, * HuffmanTree;
typedef char** HuffmanCode; //二级指针
void HuffmanCoding(HuffmanTree& HT, HuffmanCode& HC, unsigned int* w, int n)
{
int m, i, s1, s2, start;
int c, f;
char* cd;
HuffmanTree p;
if (n < 1) return ;
m = 2 * n - 1; //节点数
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode)); //给HT分配内存空间,相当于定义了HT[m+1]
p = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));
for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)
{
*p = { *w,0,0,0 };
}
for (; i <= m; ++i)
{
HT[i].weight = 0;
HT[i].parent = 0;
HT[i].rchild = 0;
HT[i].lchild = 0;
}
for (i = n + 1; i <= m; ++i)
{
Select(HT, i - 1, &s1, &s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
//---从叶子到根逆向求每个字符的赫夫曼编码---
HC = (HuffmanCode)malloc((n + 1) * sizeof(char*)); //相当于HC[n+1] , HC[i]的类型为字符指针类型
cd = (char*)malloc(n * sizeof(char)); //分配求编码的工作空间
cd[n - 1] = '\0'; //最后一个是终止符
for (i = 1; i <= n; ++i) //有n个叶子
{
start = n - 1; //从叶子开始,所以赋值从后往前赋
for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent) //先找节点,再找双亲f,再找f的双亲,直到根节点
{
if (HT[f].lchild == c) cd[--start] = '0'; //如果叶子为左子,则编码0
else cd[--start] = '1'; //右子为1
HC[i] = (char*)malloc((n - start) * sizeof(char)); //不确定编码的长度,所以继续分配空间,注意下标为i,表示每个字符的编码
strcpy(HC[i], &cd[start]); //字符串复制
}
}
free(cd); //释放工作空间
}
//---------无栈非递归遍历赫夫曼树,求赫夫曼编码
HC = (HuffmanCode)malloc((n + 1) * sizeof(char*));
p = m; cdlen = 0;
for (i = 1; i <= m; ++i) HT[i].weight = 0;//遍历赫夫曼树时用作节点状态标志
while (p) {
if (HT[p].weight == 0) {
HT[p].weight = 1;
if (HT[p].lchild != 0) { p = HT[p].lchild; cd[cdlen++] = "0"; }
else if (HT[p].rchild == 0) {
HC[p] = (char*)malloc((cdlen + 1) * sizeof(char));
cd[cdlen] = "\0"; strcpy(HC[p], cd);
}
}
else if (HT[p], weight == 1) {
HT[p].weight = 2;
if (HT[p].rchild != 0) { p = HT[p].rchild; cd[cdlen++] = "1"; }
}else
{
HT[p].weight = 0; p = HT[p].parent; --cdlen;
}
}
完整代码
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MAX 9999
#define FALSE 0
#define OK 1
#define INFEASIBLE -1
#define OVERFLOW -2
typedef int Status;
typedef struct
{
unsigned int weight; /*权值*/
unsigned int parent, lchild, rchild; /*双亲,左孩子,右孩子*/
}HTNode, * HuffmanTree;
typedef char** HuffmanCode; //二级指针
Status Select(HuffmanTree HT, int n, int* s1, int* s2) /*选择函数,找出HT中最小权值的两个节点*/
{
int i, small_1 = MAX, small_2 = MAX;
*s1 = *s2 = 1;
for (i = 1; i <= n; i++)
{
if (HT[i].parent == 0) /*无双亲*/
{
if (HT[i].weight < small_1)
{
small_2 = small_1; //更新第二最小权值
small_1 = HT[i].weight;
*s2 = *s1;
*s1 = i;
}
else if (HT[i].weight < small_2)
{
small_2 = HT[i].weight;
*s2 = i;
}
}
}
return OK;
}
void HuffmanCoding(HuffmanTree& HT, HuffmanCode& HC, unsigned int* w, int n)
{
int m, i, s1, s2, start;
int c, f;
char* cd;
HuffmanTree p;
if (n < 1) return ;
m = 2 * n - 1; //节点数
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode)); //给HT分配内存空间,相当于定义了HT[m+1]
p = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));
for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)
{
*p = { *w,0,0,0 };
}
for (; i <= m; ++i)
{
HT[i].weight = 0;
HT[i].parent = 0;
HT[i].rchild = 0;
HT[i].lchild = 0;
}
for (i = n + 1; i <= m; ++i)
{
Select(HT, i - 1, &s1, &s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
//---从叶子到根逆向求每个字符的赫夫曼编码---
HC = (HuffmanCode)malloc((n + 1) * sizeof(char*)); //相当于HC[n+1] , HC[i]的类型为字符指针类型
cd = (char*)malloc(n * sizeof(char)); //分配求编码的工作空间
cd[n - 1] = '\0'; //最后一个是终止符
for (i = 1; i <= n; ++i) //有n个叶子
{
start = n - 1; //从叶子开始,所以赋值从后往前赋
for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent)
{
if (HT[f].lchild == c) cd[--start] = '0'; //如果叶子为左子,则编码0
else cd[--start] = '1'; //右子为1
HC[i] = (char*)malloc((n - start) * sizeof(char));
strcpy(HC[i], &cd[start]);
}
}
free(cd); //释放工作空间
}
Status PrintHuffmanCode(HuffmanCode HC, unsigned int* w, int n)
{
printf("Huffman code : \n");
for (int i = 1; i <= n; i++)
printf("%d code = %s\n", w[i - 1], HC[i]);
return OK;
}
int main()
{
unsigned int w[8] = { 2,15,30,8,10,5,12,18 };
HuffmanCode HC;
HuffmanTree HT;
HuffmanCoding(HT, HC, w, 8);
PrintHuffmanCode(HC, w, 8);
return 0;
}