In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
Output Specification:
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
结尾无空行
Sample Output:
Yes
Yes
No
No
结尾无空行
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
typedef struct HfmTNode * HuffmanTree;
struct HfmTNode {
int Weight;
HuffmanTree Left;
HuffmanTree Right;
};
typedef struct HNode * Heap;
typedef HuffmanTree ElementType;
struct HNode {
ElementType * Data;//数组
int Size;//堆中当前元素个数
int Capacity;//堆的最大容量
};
typedef Heap MinHeap;
MinHeap CreateMinHeap(int capacity)
{
Heap H=(Heap)malloc(sizeof(struct HNode));
H->Data=(ElementType *)malloc((capacity+1)*sizeof(ElementType));
H->Size=0;
H->Capacity=capacity;
//这题不设置哨兵,因为要把HuffmanTree的节点HfmTNode的地址作为指针插入最小堆
return H;
}
bool Insert(MinHeap H,HuffmanTree hfmT)
{
int intend = ++H->Size;
for(;intend/2>0 && H->Data[intend/2]->Weight>hfmT->Weight;intend/=2){
H->Data[intend]=H->Data[intend/2];
}
H->Data[intend]=hfmT;
return true;
}
ElementType DeleteMin(MinHeap H)
{
ElementType hfmT=H->Data[H->Size--];
ElementType MinData=H->Data[1];//需返回的根节点存放的最小值
int intend=1,MinChild;//MinChild为最小儿子的下标
for(;intend*2<=H->Size;intend=MinChild){
MinChild=2*intend;
if( MinChild+1<=H->Size && H->Data[MinChild+1]->Weight < H->Data[MinChild]->Weight) MinChild++;//如果有右儿子且右儿子的值小
if(hfmT->Weight < H->Data[MinChild]->Weight) break;
H->Data[intend]=H->Data[MinChild];
}
H->Data[intend]=hfmT;
return MinData;
}
int WPL(HuffmanTree hfmT,int depth)
{
if(hfmT->Left==NULL && hfmT->Right==NULL) return depth*hfmT->Weight;
return WPL(hfmT->Left,depth+1)+WPL(hfmT->Right,depth+1);
}
struct info{
char character;
int weight;
};
struct info * array;
bool Check(int N,int WeightedPathLength){
bool flag=true;//就算判断完了也要读完后续的
int i,wpl=0;
char ch;
char str[N];//编码长度最多N-1
HuffmanTree root=(HuffmanTree)malloc(sizeof(struct HfmTNode));root->Left=root->Right=NULL;
root->Weight=-1;//weight>=0的表示有权重的节点,即我们要编码的字符
HuffmanTree tree=root;
for(i=0;i<N;i++){
tree=root;
while( (ch=getchar())!='\n' ) continue;
ch=getchar();
scanf("%s",str);
if(strlen(str)>N-1) flag=false;
wpl+=array[i].weight*strlen(str);
for(int j=0;str[j]!='\0';j++){
if(str[j]=='0'){
if(tree->Left==NULL){
HuffmanTree NewNode=(HuffmanTree)malloc(sizeof(struct HfmTNode));NewNode->Left=NewNode->Right=NULL;
NewNode->Weight=-1;
tree->Left=NewNode;
tree=tree->Left;
}else{
if(tree->Left->Weight>=0 || j==strlen(str)-1){
flag=false;break;
}
else{
tree=tree->Left;
}
}
}else{
if(tree->Right==NULL){
HuffmanTree NewNode=(HuffmanTree)malloc(sizeof(struct HfmTNode));NewNode->Left=NewNode->Right=NULL;
NewNode->Weight=-1;
tree->Right=NewNode;
tree=tree->Right;
}else{
if(tree->Right->Weight>=0 || j==strlen(str)-1){
flag=false;break;
}
else{
tree=tree->Right;
}
}
}
if(j==strlen(str)-1) tree->Weight=array[i].weight;
}
}
if(wpl==WeightedPathLength && flag==true) return true;
else return false;
}
int main()
{
int N,i,v;
char ch;
HuffmanTree hfmT;
int WeightedPathLength;
scanf("%d",&N);
array=(struct info *)malloc(N*sizeof(struct info));
MinHeap heap=CreateMinHeap(63);
for(i=0;i<N;i++){
getchar();
scanf("%c %d",&ch,&v);
array[i].character=ch;array[i].weight=v;
hfmT=(HuffmanTree)malloc(sizeof(struct HfmTNode));hfmT->Left=hfmT->Right=NULL;
hfmT->Weight=v;
Insert(heap,hfmT);
}
for(i=0;i<N-1;i++){
hfmT=(HuffmanTree)malloc(sizeof(struct HfmTNode));
hfmT->Left=DeleteMin(heap);
hfmT->Right=DeleteMin(heap);
hfmT->Weight=hfmT->Left->Weight+hfmT->Right->Weight;
Insert(heap,hfmT);
}
WeightedPathLength=WPL(hfmT,0);
int M;
scanf("%d",&M);
for(i=0;i<M;i++){
if(Check(N,WeightedPathLength)) puts("Yes");
else puts("No");
}
return 0;
}