In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
Output Specification:
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes Yes No
No
这道题写了很久,把每个步骤完全弄明白实在是不简单。看了一些其他人的博客,最后参考Roland_WuZF的思路和方法http://blog.csdn.net/roland_wuzf/article/details/49474841。自己写的时候也是改了又改,最后改成这样我也是醉了。还是要把步骤画出来,这样看得更清楚一点。
#include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct node{ char c; int f; }Node; struct HNode{ int *data; char *ch; int size; }; typedef struct HNode *Heap; typedef struct primitive{ char c; int f; }Primit; typedef struct code{ char c; char *ch; }Code; Heap create(int Maxsize) { Heap H = (Heap)malloc(sizeof(struct HNode)); H->data = (int *)malloc((Maxsize + 1) * sizeof(int)); H->ch = (char *)malloc((Maxsize + 1) * sizeof(char)); H->size = 0; H->data[0] = -10000; H->ch[0] = 0; return H; } void Insert(Heap H,char c,int f) { int i; i = ++H->size; for(;H->data[i/2] > f; i = i / 2) { H->data[i] = H->data[i/2]; } H->data[i] = f; H->ch[i] = c; } Node del(Heap H) { Node Min; int F,parent,child; char C; Min.f = H->data[1]; Min.c = H->ch[1]; F = H->data[H->size]; C = H->ch[H->size--]; for(parent = 1; parent * 2 <= H->size; parent = child) { child = parent * 2; if(child != H->size && (H->data[child] > H->data[child + 1])) { child++; } if(F < H->data[child])break; H->data[parent] = H->data[child]; } H->data[parent] = F; H->ch[parent] = C; return Min; } int WPL(Heap H) { int wpl = 0; int size = H->size; Node left,right,hfm; for(int i = 0; i < size-1; i++) { left = del(H); right = del(H); hfm.f = left.f + right.f; hfm.c = '*'; wpl += hfm.f; Insert(H,hfm.c,hfm.f); // printf("left = %d, right = %d, hfm.f = %d, wpl = %d\n",left.f,right.f,hfm.f,wpl); } return wpl; } int Find(Primit *q,char c,int N) { Primit *p = q; int flag = -1; for(int i = 1; i <= N; i++) { if(p[i].c == c) { flag = p[i].f;break; } } return flag; } int check(char a[],char b[]) { char *big; char *small; if(strlen(a) >= strlen(b)) { big = a; small = b; } else { big = b; small = a; } return strstr(big,small) == big; } int main(void) { int N; char c; int f; scanf("%d",&N); Primit *P = (Primit *)malloc((N)*sizeof(Primit)); Heap H = create(N); for(int i = 1; i <= N; i++) { getchar(); scanf("%c %d",&c,&f); P[i].c = c; P[i].f = f; Insert(H,c,f); } int wpl = WPL(H);/*最优编码,可以表示为除了叶节点外的所有节点的权值之和就是WPL*/ /*也就是相当与每个字符对应的权值在形成Huffman树的过程中被加的次数(这个次数相当与路径)乘以权值之和*/ // printf("wpl = %d",wpl); int M; Code stcode[N]; scanf("%d",&M); int flag = 0; int stwpl = 0; while(M--) { stwpl = 0; for(int i = 1; i <= N; i++) { getchar(); stcode[i].ch = (char *)malloc((N)*sizeof(char)); scanf("%c %s",&stcode[i].c,stcode[i].ch); int flu = Find(P,stcode[i].c,N);/*找到字符并返回相应的权值*/ // printf("flu = %d\n",flu); if(flu == -1) { flag = -1;/*输入了其他字符*/ } else { stwpl += flu*strlen(stcode[i].ch);/*学生输入编码的wpl,可以用字符串的长度来表示字符在Huffman树中的路径*/ } } int flag2 = 0; for(int i = 1; i <= N; i++) { for(int j = i+1; j <= N; j++) { flag2 = check(stcode[i].ch,stcode[j].ch);/*采用strstr函数,判断短的字符串是否是长的字符串的子串*/ if(flag2)break; /*strstr(str1,str2) 函数用于判断字符串str2是否是str1的子串。 如果是,则该函数返回str2在str1中首次出现的地址;否则,返回NULL。*/ } if(flag2)break; } if(flag == -1) { printf("No\n"); } else { if(stwpl != wpl)/*判断学生输入编码的wpl是否是最优编码*/ { printf("No\n"); } else { if(flag2)/*是否有某个字符的编码是另一个字符编码的前缀*/ { printf("No\n"); } else { printf("Yes\n"); } } } // printf("flag = %d, stwpl = %d, flag2 = %d\n",flag,stwpl,flag2); } return 0; }