计算哈夫曼树的WPL值 根据给定的n个权值(非负值),计算所构造哈夫曼树的WPL值。
基本要求:
(1)根据给定的数据,建立哈夫曼树;
(2)输出每个叶子结点的带权路径长度;
(3)输出哈夫曼树的WPL值。
测试数据要求: 输入的n个权值之和应为100,且不允许有负值。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define n 5 //5个英文小写字母(注意,该示例可扩展到所有可显示字符,可自行添加改写),此示例中字符串仅允许包含小写字母
//哈夫曼树相关
//节点定义,使用数组存储链表,指针使用位置索引
typedef struct {
int weight;
int parent, lchild, rchild;
}HTNode, *PHTNode, *HTree;
//编码表
typedef char **HCode;
//辅助操作,从1到k中选择两个权重最小的节点
void Select(HTree HT, int k, int &s1, int &s2)
{
int min, smin; min = smin = 100000;
for(int i = 1; i <= k; i++){
if(HT[i].parent) continue; //如果已经安排了双亲节点
if(HT[i].weight < min){
s2 = s1; smin = min;
s1 = i; min = HT[i].weight;
}
else if(HT[i].weight < smin){
s2 = i; smin = HT[i].weight;
}
}
//使得s1小于s2,可使编码较规范,尽量以0结束
int temp;
if(s1 > s2){
temp = s1; s1 = s2; s2 = temp;
}
}
//根据权重数据构建Huffman树
void ConstructHuffmanTree(HTree &HT, int *w)
{
int m = 2 * n - 1; //节点数目
HT = (HTree)malloc((m+1) * sizeof(HTNode)); //分配空间,0号位置不使用
//初始化各个节点
int i, s1, s2;
for(i = 1; i <= n; i++) { HT[i].weight = w[i]; HT[i].parent = HT[i].lchild = HT[i].rchild = 0; }
for(; i <= m; i++) { HT[i].weight = HT[i].parent = HT[i].lchild = HT[i].rchild = 0; }
//建立Huffman树
for(i = n + 1; i <= m; i++){
Select(HT, i - 1, s1, s2);
HT[s1].parent = HT[s2].parent = i;
HT[i].lchild = s1; HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
}
//从Huffman树获取编码表
//从根到叶子(递归),cd保存获取的编码记录
void GetCodeTable(HTree HT, HCode HC, int p, char *cd, int cdlen)
{
//如果已经到达叶子节点,登记编码
if(p <= n){
HC[p] = (char *)malloc((cdlen + 1) * sizeof(char));
cd[cdlen] = '\0'; strcpy(HC[p], cd);
}
if(HT[p].lchild){ cd[cdlen] = '0'; GetCodeTable(HT, HC, HT[p].lchild, cd, cdlen + 1); } //向左子树前进
if(HT[p].rchild){ cd[cdlen] = '1'; GetCodeTable(HT, HC, HT[p].rchild, cd, cdlen + 1);} //向右子树前进
}
//从叶子到根,教程算法
void GetCodeTable1(HTree HT, HCode &HC)
{
HC = (HCode)malloc((n+1)*sizeof(char *));
char *cd = (char *)malloc(n * sizeof(char));
cd[n -1] = '\0';
int start, c, f;
for(int i = 1; i <= n; i++){
start = n - 1;
for(c = i, f = HT[i].parent; f; c = f, f = HT[f].parent){
if(c == HT[f].lchild) cd[--start] = '0';
else cd[--start] = '1';
}
HC[i] = (char *)malloc((n - start) * sizeof(char));
strcpy(HC[i], cd + start);
}
free(cd);
}
//从根到叶子(非递归),教程算法,这是模拟递归栈的一种典型方法,需注意
void GetCodeTable2(HTree HT, HCode &HC)
{
HC = (HCode)malloc((n+1) * sizeof(char *));
char *cd = (char *)malloc(n * sizeof(char));
int m = 2 * n - 1, p = m, cdlen = 0;
//借用weight做左右子树是否已经遍历的标志,注意会破坏原来的weight数据
for(int i = 1; i <= m; i++) HT[i].weight = 0;
while(p){
//左右子树均未访问,访问左子树
if(HT[p].weight == 0){
HT[p].weight = 1; //设置标志左子树已经访问
//如果仍未到达叶子节点,继续向下层进发,同时记录编码
if(HT[p].lchild) { p = HT[p].lchild; cd[cdlen++] = '0'; continue;}
//否则如果是叶子节点的话登记字符编码(其实在右子树那边检查也可以),否则会继续遍历右子树
if(!HT[p].rchild){
HC[p] = (char *)malloc((cdlen + 1) * sizeof(char));
cd[cdlen] = '\0'; strcpy(HC[p], cd);
}
continue;
}
//如果左子树已经访问过而右子树没有访问过
if(HT[p].weight == 1){
HT[p].weight = 2; //设置左右子树均已经访问过标志
//如果右子树不为空,继续向下层进发,同时记录编码
if(HT[p].rchild) { p = HT[p].rchild; cd[cdlen++] = '1'; continue;}
//其实在右子树这边登记也可以
/*else if(!HT[p].lchild){
HC[p] = (char *)malloc((cdlen + 1) * sizeof(char));
cd[cdlen] = '\0'; strcpy(HC[p], cd);
}*/
continue;
}
//如果左右两个子树都已经访问过了,退回到双亲节点(同时重置访问标志,字符记录位置也前移)
HT[p].weight = 0; --cdlen; p = HT[p].parent;
}
}
//输出码表
void PrintCodeTable(HCode HC)
{
printf("\nThe Code Table:\n");
for(int i = 1; i <= n; i++){
printf("路径长度:%d %c : %s\n",strlen(HC[i]), 'a' + i - 1, HC[i]);
}
}
//使用码表对字符串编码
void Encode(HCode HC, char *str, char **code)
{
*code = (char *)malloc(sizeof(char) * 1024); (*code)[0] = '\0';
int pos = 0; //pos为字符串指针
while(str[pos]){
strcat(*code, HC[str[pos++] - 'a' + 1]);
}
}
//对哈夫曼树进行计算WPL
int WPL(HCode HC,int w[]){
int sum=0;
//int w[] = { 10,30,20,5,35};
for(int i = 1; i <= n; i++)
sum=sum+strlen(HC[i])*w[i];
return sum;
}
int main()
{
//5个英文字符的权重(频率统计)
int w[] = { 0,5,20,25,5,45};
HTree HT;
ConstructHuffmanTree(HT, w); //构建Huffman树
//从树中获取编码表
HCode HC = (HCode)malloc((n+1) * sizeof(char *));
char *cd = (char *)malloc(n * sizeof(char));
int cdlen = 0;
GetCodeTable(HT, HC, 2 * n - 1, cd, cdlen);
PrintCodeTable(HC); //输出编码表
printf("WPL:%d\n",WPL(HC,w));
return 0;
}