In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] … c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 ‘0’s and ‘1’s.
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct TreeNode* HuffmanTree;
struct TreeNode{
int weight;
HuffmanTree left, right;
};
typedef struct HeapStruct* MinHeap;
struct HeapStruct{
HuffmanTree Elements;//堆中元素为上面定义的类型
int Size;//堆中已有元素个数
int Capacity;//堆中最多元素个数
};
MinHeap BuildMinHeap(int weight[], int MaxSize, char CH[]);
MinHeap MinHeap_Create(int MaxSize);
void Insert(MinHeap H, HuffmanTree T);
HuffmanTree Huffman(MinHeap H);
HuffmanTree DeleteMin(MinHeap H);
int CalWpl(HuffmanTree T, int Depth);
int HasPreFix(char s[][200], int n);
int IsPreFix(char a[], char b[]);
int main()
{
int n, i, freq[256];
char CH[100];
int num[100];
scanf("%d", &n);//n是有多少字符
for(i = 0; i < n; i++){
scanf(" %c %d", &CH[i], &num[i]);
freq[CH[i]] = num[i];//以字符的安斯克码作为下标存字符的权值
}
//建造最小堆
MinHeap H = BuildMinHeap(freq, n, CH);
//下面构造哈夫曼树
HuffmanTree T = Huffman(H);
//下面计算哈夫曼树的最短路径长度
int wpl = CalWpl(T, 0);
int k;//有k人提交
int ThisWpl;
scanf("%d", &k);
while(k--){
char ch[256];
char str[256][200];
ThisWpl = 0;
for(i = 0; i < n; i++){
scanf(" %c %s", &ch[i], str[i]);
ThisWpl += freq[ch[i]]*strlen(str[i]);
}
if(wpl == ThisWpl && !HasPreFix(str, n)){
printf("Yes\n");
}else{
printf("No\n");
}
}
return 0;
}
//创建一个最小堆
MinHeap BuildMinHeap(int weight[], int MaxSize, char CH[])
{
int i;
//创建一个空的最小堆
MinHeap H = MinHeap_Create(MaxSize);
HuffmanTree Temp = (HuffmanTree)malloc(sizeof(struct TreeNode));
for(i = 0; i < MaxSize; i++){
Temp->weight = weight[CH[i]];
Temp->left = NULL;
Temp->right = NULL;
Insert(H, Temp);
}
free(Temp);
return H;
}
//创建一个空的最小堆
MinHeap MinHeap_Create(int MaxSize)
{
MinHeap H = (MinHeap)malloc(sizeof(struct HeapStruct));
H->Elements = (HuffmanTree)malloc((MaxSize + 1)*sizeof(struct TreeNode));
H->Size = 0;
H->Capacity = MaxSize;
H->Elements[0].weight = -1;
return H;
}
//把哈夫曼类型的结点插到最小堆中,然后还是最小堆
void Insert(MinHeap H, HuffmanTree T)
{
int i;
if(H->Size == H->Capacity)
{
printf("最小堆已满");
return;
}
i = ++H->Size;
for(; H->Elements[i/2].weight > T->weight; i /= 2){
H->Elements[i] = H->Elements[i/2];
}
H->Elements[i] = *T;
}
HuffmanTree Huffman(MinHeap H)
{
int i;
HuffmanTree T;
int k;
k = H->Size;
for(i = 1; i < k; i++){//Size个元素要合并Size - 1次
T = (HuffmanTree)malloc(sizeof(struct TreeNode));
T->left = DeleteMin(H);
T->right = DeleteMin(H);
T->weight = T->left->weight + T->right->weight;
Insert(H, T);
}
T = DeleteMin(H);
return T;
}
//从最小堆中取出根节点的最小值元素,并且删除堆的一个结点
HuffmanTree DeleteMin(MinHeap H)
{
int parent, child;
HuffmanTree MinItem, temp;
if(H->Size == 0){
printf("最小堆已空");
return;
}
MinItem = (HuffmanTree)malloc(sizeof(struct TreeNode));
temp = (HuffmanTree)malloc(sizeof(struct TreeNode));
*MinItem = H->Elements[1];
*temp = H->Elements[H->Size--];
for(parent = 1; parent*2 <= H->Size; parent = child){
child = parent*2;
if(child != H->Size && (H->Elements[child].weight > H->Elements[child+1].weight)){
child++;
}
if(temp->weight <= H->Elements[child].weight) break;
else{
H->Elements[parent] = H->Elements[child];
}
}
H->Elements[parent] = *temp;
free(temp);
return MinItem;
}
int CalWpl(HuffmanTree T, int Depth)
{
if(!T->left && !T->right){
return (Depth*T->weight);
}else{
return (CalWpl(T->left, Depth + 1) +
CalWpl(T->right, Depth + 1));
}
}
int HasPreFix(char s[][200], int n)
{
int i, j;
for(i = 0; i < n; i++){
for(j = i + 1; j < n; j++){
if(IsPreFix(s[i], s[j]))
return 1;
}
}
return 0;
}
int IsPreFix(char a[], char b[])
{
while(a && b && *a == *b){
a++; b++;
}
if(*a == '\0' || *b == '\0')
return 1;
else return 0;
}