/* 赫夫曼编码Huffman,数据压缩编码
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WEIGHT 100000 /* 定义最大权值 */
typedef struct _HTNode
{
unsigned int weight; // 频度、权值、出现次数等
unsigned int parent, lchild, rchild;
} HTNode, * HuffmanTree; // 动态分配数组存储赫夫曼树
typedef char ** HuffmanCode; // 动态分配数组存储赫夫曼编码表
// 在HT[1..n]中选择parent为0且weight最小的两个结点,序号分别为s1和s2
void select(HuffmanTree HT, int n, int *s1, int *s2)
{
int min = 0;
int second_min = 0;
int i = 0;
if ((s1 == NULL)||(s2 == NULL)) return ;
for (i = 1; i <= n; i++)
{
if (HT[i].parent != 0)
{
continue;
}
if (HT[i].weight < HT[min].weight)
{
second_min = min;
min = i;
}
else if (HT[i].weight < HT[second_min].weight)
{
second_min = i;
}
}
*s1 = min;
*s2 = second_min;
}
/****************************************************************************************
* 函数名称: HuffmanCoding
* 函数功能: 赫夫曼编码,构造出赫夫曼树并求出n个字符的赫夫曼编码
* 输入参数: w 存放n个字符的权值(均>0)
* n 字符个数
* 输出参数: HT 赫夫曼树
* HC 赫夫曼编码
* 返 回 值: 1 成功,<=0 失败
****************************************************************************************/
void HuffmanCoding(HuffmanTree *HT, HuffmanCode *HC, int *w, int n)
{
int m = 0, s1 = 0, s2 = 0;
int start = 0;
int i = 0;
int c = 0, f = 0;
HTNode *p;
char *cd = NULL;
if (n <= 1) return;
if ((HT == NULL)||(HC == NULL)||(w == NULL)) return;
m = 2*n -1;
*HT = (HuffmanTree)malloc((m+1)*sizeof(HTNode)); // 0号单元不使用
(*HT)->weight = MAX_WEIGHT+1; // 0号单元赋值最大的权值,用来比较
for (p = (HTNode *)((*HT)+1), i = 1; i <= n; i++,p++,w++)
{
p->weight = *w;
p->parent = 0;
p->lchild = 0;
p->rchild = 0;
}
for (; i <= m; i++,p++)
{
p->weight = 0;
p->parent = 0;
p->lchild = 0;
p->rchild = 0;
}
for (i = n+1; i <= m; i++)
{
select(*HT, i-1, &s1,&s2);
(*HT)[s1].parent = i;
(*HT)[s2].parent = i;
(*HT)[i].lchild = s1;
(*HT)[i].rchild = s2;
(*HT)[i].weight = (*HT)[s1].weight + (*HT)[s2].weight;
}
/* 从叶子到根逆向求每个字符的Huffman编码 */
*HC = (HuffmanCode)malloc((n+1)*sizeof(char *));
cd = (char*)malloc(n * sizeof(char)); // 分配n个字符编码的头指针变量
cd[n-1] = '\0'; /* 编码结束符 */
for (i = 1; i <= n; i++) /* 逐个字符求Huffman编码 */
{
start = n-1; /* Huffman编码结束位置 */
for (c = i, f = (*HT)[i].parent; f != 0; c=f,f=(*HT)[f].parent)
{
if ((*HT)[f].lchild == c)
{
cd[--start] = '0';
}
else
{
cd[--start] = '1';
}
(*HC)[i] = (char *)malloc((n-start+1)*sizeof(char));
strcpy((*HC)[i], &cd[start]);
}
}
free(cd);
}
/****************************************************************************************
* 函数名称: HuffmanCoding
* 函数功能: 遍历赫夫曼树并求出n个字符的赫夫曼编码
* 输入参数: n 字符个数
* 输出参数: HT 赫夫曼树
* HC 赫夫曼编码
* 返 回 值: 1 成功,<=0 失败
****************************************************************************************/
void GetHuffmanCoding(HuffmanTree *HT, HuffmanCode *HC, int n)
{
int m = 2*n-1;
int p = m;
int cdlen = 0;
char *cd = NULL;
int i = 0;
*HC = (HuffmanCode)malloc((n+1)*sizeof(char *));
cd = (char*)malloc(n * sizeof(char)); // 分配n个字符编码的头指针变量
cd[n-1] = '\0'; /* 编码结束符 */
for (i = 1; i <= m; i++)
{
(*HT)[i].weight = 0;
}
while (p)
{
if ((*HT)[p].weight == 0)
{
(*HT)[p].weight = 1;
if ((*HT)[p].lchild != 0)
{
p = (*HT)[p].lchild;
cd[cdlen++] = '0';
}
else if ((*HT)[p].rchild == 0)
{
(*HC)[p] = (char*)malloc((cdlen+1)*sizeof(char));
cd[cdlen] = '\0';
strcpy((*HC)[p], cd);
}
}
else if((*HT)[p].weight == 1)
{
(*HT)[p].weight = 2;
if ((*HT)[p].rchild != 0)
{
p = (*HT)[p].rchild;
cd[cdlen++] = '1';
}
}
else
{
(*HT)[p].weight = 0;
p = (*HT)[p].parent;
cdlen--;
}
}
}
int main(void)
{
int w[16] = {1,2,3,4,5,6,7,8,9,10};
int n = 16;
int i = 0;
int sumlen = 0;
HuffmanTree HT;
HuffmanCode HC = NULL;
HuffmanCode HC1 = NULL;
for (i = 1; i <= n; i++)
{
w[i] = i;
}
HuffmanCoding(&HT, &HC, w, n);
printf("HuffmanCode:\n");
for (i = 1; i <= n; i++)
{
printf("%s\n", HC[i]);
sumlen += strlen(HC[i]);
}
printf("sumlen = %d\n", sumlen);
GetHuffmanCoding(&HT, &HC1, n);
printf("HuffmanCode:\n");
for (i = 1; i <= n; i++)
{
printf("%s\n", HC1[i]);
}
puts("Bye!");
return 0;
}