In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0’s and '1’s.
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
本题分两步,第一步是判断是不是最低总权重,第二部判断是否有前缀码,即是否有二意解码
为了求出最低总权重,我们需要构造一颗Huffman树,并计算出最低总权重
typedef struct HuffNode* Huffman;
struct HuffNode
{
int weight;
Huffman left, right;
};
而为了构造一颗Huffman是,我们还需要一个最小堆
typedef struct HeapNode* Heap;
struct HeapNode
{
Huffman* elements;
int size;
int capacity;
};
堆的操作定义如下
Heap CreateHeap(int n)
{
Heap H;
H = (Heap)malloc(sizeof(HeapNode));
H->elements = (Huffman*)malloc(2 * n * sizeof(Huffman));
H->capacity = 2 * n - 1;
H->size = 0;
H->elements[0] = (Huffman)malloc(sizeof(HeapNode));
H->elements[0]->weight = 0;
H->elements[0]->left = NULL;
H->elements[0]->right = NULL;
return H;
}
void PreDown(Heap H, int node)
{
Huffman temp = H->elements[node];
int child;
while (node * 2 <= H->size)
{
child = node * 2;
if (node * 2 != H->size && H->elements[child]->weight > H->elements[child + 1]->weight)
child++;
if (H->elements[child]->weight < temp->weight)
{
H->elements[node] = H->elements[child];
node = child;
}
else
break;
}
H->elements[node] = temp;
}
void Build(Heap H)
{
int temp;
for (int i = H->size / 2; i > 0; i--)
PreDown(H, i);
}
Huffman Delet(Heap H)
{
Huffman data;
if (H->size == 0)
return NULL;
else
{
data = H->elements[1];
H->elements[1] = H->elements[H->size--];
PreDown(H, 1);
}
return data;
}
void Insert(Heap H, Huffman elem)
{
H->elements[++H->size] = elem;
int temp = elem->weight;
int node = H->size;
while (temp<H->elements[node/2]->weight)
{
H->elements[node] = H->elements[node / 2];
node = node / 2;
}
H->elements[node] = elem;
}
对Huffman树的操作定义如下
Huffman CreateHuffman(Heap H,int n)
{
Huffman T;
for (int i = 0; i < n - 1; i++)
{
T = (Huffman)malloc(sizeof(HuffNode));
T->left = Delet(H);
T->right = Delet(H);
T->weight = T->left->weight + T->right->weight;
Insert(H, T);
}
T = Delet(H);
return T;
}
int ComputeTotalWeight(Huffman T)
{
int totalweight = 0;
queue<Huffman> q;
int layer[130];
int i = 0, j = 0;
layer[j] = 0;
q.push(T);
while (!q.empty())
{
T = q.front();
q.pop();
if (T->left == NULL && T->right == NULL)
{
totalweight = totalweight + T->weight * layer[i];
}
if (T->left)
{
q.push(T->left);
layer[++j] = layer[i] + 1;
}
if (T->right)
{
q.push(T->right);
layer[++j] = layer[i] + 1;
}
i++;
}
return totalweight;
}
主函数如下
int main()
{
int n;
cin >> n;
map<char, int> mweight;
char code;
int weight;
for (int i = 0; i < n; ++i)
{
cin >> code >> weight;
mweight[code] = weight;
}
Heap H;
H = CreateHeap(n);
Huffman T;
for (auto it = mweight.begin(); it != mweight.end(); it++)
{
T = (Huffman)malloc(sizeof(HuffNode));
T->left = NULL;
T->right = NULL;
T->weight = it->second;
H->elements[++H->size] = T;
}
Build(H);
T = CreateHuffman(H, n); //利用堆生成了一颗Huffman树
int totalweight;
totalweight = ComputeTotalWeight(T); //计算最小总权重
int jn, testtotalweight;
string decode;
cin >> jn;
map<char, string> mdecode;
for (int i = 0; i < jn; i++)//依次判断
{
for (int i = 0; i < n; ++i)
{
cin >> code >> decode;
mdecode[code] = decode;
}
testtotalweight = 0;
for (auto it = mdecode.begin(); it != mdecode.end(); it++)
testtotalweight = testtotalweight + mweight[it->first] * it->second.length();
if (testtotalweight > totalweight)//总权重大于最小总权重
cout << "No" << endl;
else
{
bool pre = false;//判断是否有前缀编码
for (auto it = mdecode.begin(); it != (mdecode.end()); it++)
{
for (auto it2 = it; it2 != mdecode.end(); it2++)
{
if (it2 == it)
continue;
if (it->second == it2->second)
{
pre = true;
break;
}
else if (it->second.length() < it2->second.length())
{
if (it->second == it2->second.substr(0, it->second.length()))
{
pre = true;
break;
}
}
else if (it->second.length() > it2->second.length())
{
if (it2->second == it->second.substr(0, it2->second.length()))
{
pre = true;
break;
}
}
}
if (pre)
break;
}
if (pre)
cout << "No" << endl;
else
cout << "Yes" << endl;
}
}
return 0;
}