In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
Output Specification:
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <malloc.h>
#include <string.h>
#define OK 1
#define ERROR 0
#define TRUE 1
#define FALSE 0
#define INFEASIBLE -1
//#define OVERFLOW -2
using namespace std;
typedef int status;
typedef struct
{
int weight;
int parent, lchild, rchild;
}HTNode;
typedef HTNode *HuffmanTree;//huffman 树
typedef struct HuffmanCode
{
int length;
char code[65];
}HuffmanCode;//存放二进制编码
status CreatWeight(int *w, int n);
status CreatHuffmanTree(HuffmanTree &HT, int n, int * w);
status Select(HuffmanTree &HT, int n, int &s1, int &s2);//选出权值最小的两个数
status TreeWLength(HuffmanTree HT, int n);
status IsTrue(int sum, int n, HuffmanTree HT);
status isShortLength(int n, HuffmanCode *cd, HuffmanTree HT, int sum);
status isPreCode(int n, HuffmanCode *cd);
int main()
{
int n;
cin >> n;
//把权值读入数组
int *w = new int[n + 1];
CreatWeight(w, n);
//构建霍夫曼树
HuffmanTree HT;
CreatHuffmanTree(HT, n, w);
//求最短路径,并赋值给sum
int sum;
sum = TreeWLength(HT, n);
//判断各个同学的例子
int m;
cin >> m;
for (int i = 0; i < m; i++)
{
int flag = 0;
flag = IsTrue(sum, n, HT);
if (flag) cout << "Yes" << endl;
else cout << "No" << endl;
}
return 0;
}
status CreatWeight(int *w, int n)
{
char temp_ch;
for (int i = 1; i <= n; i++)
{
cin >> temp_ch >> w[i];
}
return 0;
}
status CreatHuffmanTree(HuffmanTree &HT, int n, int * w)
{
if (n <= 1) return ERROR;
int m = 2 * n - 1;
HT = new HTNode[m + 1];
HuffmanTree p;
int i;
for (p = HT+1, i = 1; i <= n; ++i,++p)//0号元素不放节点
{
++w;
p->weight = *w;
p->parent = 0;
p->rchild = 0;
p->lchild = 0;
}
for (i = n + 1; i <= m; ++i, ++p)//把双亲节点初始化
{
p->weight = 0;
p->parent = 0;
p->rchild = 0;
p->lchild = 0;
}
for (i = n + 1; i <= m; ++i)
{
int s1, s2;
Select(HT, i - 1, s1, s2);
HT[s1].parent = i; HT[s2].parent = i;
HT[i].weight = HT[s1].weight + HT[s2].weight;
HT[i].lchild = s1;
HT[i].rchild = s2;
}
return OK;
}
status Select(HuffmanTree &HT, int n, int &s1, int &s2)//选出权值最小的两个数
{
int i = 1;
int min1 = 21474836, min2 = 21474836;
for (i = 1; i <= n; i++)
{
if (HT[i].parent == 0)
{
if (HT[i].weight < min1)
{
min2 = min1;
min1 = HT[i].weight;
s2 = s1;
s1 = i;
}
else if (HT[i].weight < min2)
{
min2 = HT[i].weight;
s2 = i;
}
}
}
return OK;
}
status TreeWLength(HuffmanTree HT, int n)
{
int sum = 0;//记录总权值路径长度
for (int i = 1; i <= n; ++i)
{
int cnt = 0;
int k = i;
while (HT[k].parent)
{
k = HT[k].parent; ++cnt;
}//求每个的路径长度
sum += HT[i].weight * cnt;//总的等于每一个的权值路径的和
}
return sum;
}
status IsTrue(int sum, int n, HuffmanTree HT)
{
int flag = 0;
HuffmanCode* cd = new HuffmanCode[n + 1];
//读入学生的二进制编码
for (int i = 1; i <= n; i++)
{
char temp_ch;
cin >> temp_ch >> cd[i].code;
cd[i].length = strlen(cd[i].code);
}
if (isShortLength(n, cd, HT, sum) && isPreCode(n, cd))
flag = TRUE;
return flag;
}
status isShortLength(int n, HuffmanCode *cd, HuffmanTree HT, int sum)
{
int sum1 = 0;
for (int i = 1; i <= n; i++)
{
sum1 += cd[i].length*HT[i].weight;
}
if (sum1 == sum) { return TRUE; }
else return FALSE;
}
status isPreCode(int n, HuffmanCode *cd)
{
int flag = TRUE;
int isend = 0;
for (int i = n; i >= 1; --i)
{
for (int j = i - 1; j >= 1; --j)
{
int cnt = 0;
int len = strlen(cd[i].code);
for (int k = 0; k < len; k++)
{
if (cd[i].code[k] == cd[j].code[k])
++cnt;
}
if (cnt == len)
{
flag = FALSE; isend = 1; break;
}
}
if (isend) break;
}
return flag;
}