05-树9 Huffman Codes (30分)
In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] … c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0’s and '1’s.
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
我们确立哈夫曼编码的时候,我们首先确定这个编码一定是最优编码,即总长度WPL最小,其次这个编码没有歧义,前缀码没有二义性,数据仅存于叶节点中。
所以,我们的核心算法首先是计算最优编码长度。
我们需要用最小堆来建立哈夫曼树,每次从最小堆中调用两个元素,然后再把这昂两个元素的权重和压回最小堆中。以此往复。然后我们还需要计算这颗哈夫曼树的总权重,递归去求,也就是左子树的权重加上右子树的权重。
然后就是检查长度是否正确,建树的过程中是否满足前缀码要求。
编码长度最坏的情况下就是N-1
如果编码长度超过n-1,则当读完的时候输出错误。
在测试前缀码是否符合要求的时候,我们需要建立相应的树。
对于1,我们就建立右子树,0,就是建立左子树。并且都是空节点。当到最后的时候将最后一个节点给与权重。
到这里的时候我们还是满足条件。
但是我们来看下一个的时候会怎么样?
到这里额时候我们会发现,在经历第二个0的时候,我们碰到了带权重的结点。所以这就不满足前缀码的条件了。
当到这一个的时候,我们就会发现,到最后一个1的时候,不是叶子结点,所以也不符合条件。
我们来看一下代码
首先是前面的一些定义
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define Maxsize 64
typedef struct TreeNode *Huffman;
char ch[Maxsize];
int N,w[Maxsize],TotalCodes;
struct TreeNode//树节点
{
int weight;
Huffman left,right;
};
typedef struct HeapNode *MinHeap;
struct HeapNode//最小堆
{
Huffman data[Maxsize];
int size;
};
要实现的函数:
MinHeap CreateHeap();//创造最小堆
Huffman CreateHuffman();//创造哈夫曼树
void Insert(MinHeap H,Huffman x);//往最小堆插入元素
Huffman DeleteMin(MinHeap H);//从最小堆抛出一个元素
Huffman BuildHuffman(MinHeap H);//建造哈夫曼树
int WPL(Huffman root, int depth);//计算权重
int judge();//判断
主函数入口:
int main()
{
int M;
Huffman tmp,root;
scanf("%d",&N);
MinHeap H=CreateHeap();
for(int i=0;i<N;i++)
{
getchar();//吸收缓冲区字符
scanf("%c %d",&ch[i],&w[i]);//ch数组存相应的编码符号,w数组存对应的频率
tmp=CreateHuffman();
tmp->weight=w[i];
Insert(H,tmp);//往堆中插入频率
}
root=BuildHuffman(H);//建造哈夫曼树
TotalCodes=WPL(root,0);//计算总权重
scanf("%d",&M);
for(int i=0;i<M;i++)//判断
{
if(judge())
printf("Yes\n");
else
printf("No\n");
}
return 0;
}
建造最小堆
MinHeap CreateHeap()
{
MinHeap H;
H=(MinHeap)malloc(sizeof(struct HeapNode));
H->size=0;
H->data[0]=(Huffman)malloc(sizeof(struct TreeNode));
H->data[0]->left=H->data[0]->right=NULL;
H->data[0]->weight=-1;
return H;
}
建立哈夫曼结点
Huffman CreateHuffman()
{
Huffman H;
H=(Huffman)malloc(sizeof(struct TreeNode));
H->left=H->right=NULL;
H->weight=0;
return H;
}
往最小堆中插入元素
void Insert(MinHeap H,Huffman x)
{
int i=++H->size;
while(x->weight<H->data[i/2]->weight)
{
H->data[i]=H->data[i/2];
i/=2;
}
H->data[i]=x;
}
从最小堆中删除最小的元素
Huffman DeleteMin(MinHeap H)
{
Huffman Mintem,temp;
int parent,child;
Mintem=H->data[1];
temp=H->data[H->size--];
for(parent=1;parent*2<=H->size;parent=child)
{
child=parent*2;
if((child!=H->size)&&(H->data[child]->weight>H->data[child+1]->weight))
child++;
if(temp->weight<=H->data[child]->weight)
break;
else
{
H->data[parent]=H->data[child];
}
}
H->data[parent]=temp;
return Mintem;
}
建造哈夫曼树
Huffman BuildHuffman(MinHeap H)
{
Huffman T;
int num=H->size;
for(int i=1;i<num;i++)//循环建立哈夫曼树
{
T=CreateHuffman();
T->left=DeleteMin(H);
T->right=DeleteMin(H);
T->weight=T->left->weight+T->right->weight;
Insert(H,T);
}
T=DeleteMin(H);//根节点就为最小堆中的最后一个元素
return T;
}
计算权重
int WPL(Huffman root,int depth)
{
if((root->left==NULL)&&(root->right==NULL))//左右子树都为空
return depth*root->weight;
else//递归去左右子树求权重,而且深度加1
return WPL(root->left,depth+1)+WPL(root->right,depth+1);
}
判断函数
int judge()
{
Huffman T,p;
char ch1,*codes;
int length=0,flag=1,j,wgh;
codes=(char*)malloc(sizeof(char)*Maxsize);
T=CreateHuffman();
for(int i=0;i<N;i++)
{
scanf("\n%c %s",&ch1,codes);
if(strlen(codes)>=N)
flag=0;
else{
for(j = 0;ch1!=ch[j];j++);
wgh = w[j];
p = T;
for(j=0;j<strlen(codes);j++)
{
if(codes[j]=='0') {
if(!p->left)
p->left = CreateHuffman();
p = p->left;
}else if(codes[j] == '1') {
if(!p->right)
p->right = CreateHuffman();
p = p->right;
}
if(p->weight) flag = 0;
}
if(p->left || p->right )
flag = 0;
else
p->weight = wgh;
}
length += strlen(codes)*p->weight;
}
if(length!=TotalCodes)
flag = 0;
return flag;
}
总代码
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define Maxsize 64
typedef struct TreeNode *Huffman;
char ch[Maxsize];
int N,w[Maxsize],TotalCodes;
struct TreeNode
{
int weight;
Huffman left,right;
};
typedef struct HeapNode *MinHeap;
struct HeapNode
{
Huffman data[Maxsize];
int size;
};
MinHeap CreateHeap();
Huffman CreateHuffman();
void Insert(MinHeap H,Huffman x);
Huffman DeleteMin(MinHeap H);
Huffman BuildHuffman(MinHeap H);
int WPL(Huffman root, int depth);
int judge();
int main()
{
int M;
Huffman tmp,root;
scanf("%d",&N);
MinHeap H=CreateHeap();
for(int i=0;i<N;i++)
{
getchar();
scanf("%c %d",&ch[i],&w[i]);
tmp=CreateHuffman();
tmp->weight=w[i];
Insert(H,tmp);
}
root=BuildHuffman(H);
TotalCodes=WPL(root,0);
scanf("%d",&M);
for(int i=0;i<M;i++)
{
if(judge())
printf("Yes\n");
else
printf("No\n");
}
return 0;
}
MinHeap CreateHeap()
{
MinHeap H;
H=(MinHeap)malloc(sizeof(struct HeapNode));
H->size=0;
H->data[0]=(Huffman)malloc(sizeof(struct TreeNode));
H->data[0]->left=H->data[0]->right=NULL;
H->data[0]->weight=-1;
return H;
}
Huffman CreateHuffman()
{
Huffman H;
H=(Huffman)malloc(sizeof(struct TreeNode));
H->left=H->right=NULL;
H->weight=0;
return H;
}
void Insert(MinHeap H,Huffman x)
{
int i=++H->size;
while(x->weight<H->data[i/2]->weight)
{
H->data[i]=H->data[i/2];
i/=2;
}
H->data[i]=x;
}
Huffman DeleteMin(MinHeap H)
{
Huffman Mintem,temp;
int parent,child;
Mintem=H->data[1];
temp=H->data[H->size--];
for(parent=1;parent*2<=H->size;parent=child)
{
child=parent*2;
if((child!=H->size)&&(H->data[child]->weight>H->data[child+1]->weight))
child++;
if(temp->weight<=H->data[child]->weight)
break;
else
{
H->data[parent]=H->data[child];
}
}
H->data[parent]=temp;
return Mintem;
}
Huffman BuildHuffman(MinHeap H)
{
Huffman T;
int num=H->size;
for(int i=1;i<num;i++)
{
T=CreateHuffman();
T->left=DeleteMin(H);
T->right=DeleteMin(H);
T->weight=T->left->weight+T->right->weight;
Insert(H,T);
}
T=DeleteMin(H);
return T;
}
int WPL(Huffman root,int depth)
{
if((root->left==NULL)&&(root->right==NULL))
return depth*root->weight;
else
return WPL(root->left,depth+1)+WPL(root->right,depth+1);
}
int judge()
{
Huffman T,p;
char ch1,*codes;
int length=0,flag=1,j,wgh;
codes=(char*)malloc(sizeof(char)*Maxsize);
T=CreateHuffman();
for(int i=0;i<N;i++)
{
scanf("\n%c %s",&ch1,codes);
if(strlen(codes)>=N)//代码长度大于字符总个数
flag=0;
else{
for(j = 0;ch1!=ch[j];j++);//找到对应的字母
wgh = w[j];//对应的频率
p = T;
for(j=0;j<strlen(codes);j++)
{
if(codes[j]=='0') //建立左子树
{
if(!p->left)
p->left = CreateHuffman();
p = p->left;
}else if(codes[j] == '1') //建立右子树
{
if(!p->right)
p->right = CreateHuffman();
p = p->right;
}
if(p->weight) flag = 0;//此节点已经有权重了,不符合前缀码要求
}
if(p->left || p->right )//不是叶子结点
flag = 0;
else
p->weight = wgh;//这个节点给予权重
}
length += strlen(codes)*p->weight;//权重进行累加
}
if(length!=TotalCodes)//累加的权重如果不等于最后的总权重
flag = 0;
return flag;
}
希望大家能够好好体会哈夫曼树的代码,这是一个难点,希望大家能够多花点时间研究研究,取得进步!