[时间]:6h+
[难度]:中等偏难 (主要是3条性质初学者难以发现)
[reference]:
[反思,收获]:
学习方法: 关键是在理论上拿下这个题;(能否发现那3条性质) 先在草稿纸上写出框架(伪码),然后再具体上级coding,不选择好策略就上战场是一种愚蠢的表现---提前分析框架的重要性; 某些性质能够在数据结构的形成过程中能够计算出来(WPL),这样我就可以避免不建huffman树; 自己和自己说话,不然自己大脑容易陷入”乌云“当中;自己很辣鸡,多学习多学习多学习啊! ASCII码一般是128位(8bits里第一位一般为0,只剩7位可供使用,2的7次方=128),所以直接建立一个元素个数位128的数组字节存储它们的元素值和频率---直接映射以加快寻找速度; 在状态和过程中游离,看看能否发现它们之间存在的规律。
[hints]: 可以不创建树的物理结构
In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
Output Specification:
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
------------------------------------------------------- line -------------------------------------------------------------
Goal:
- I.计算mini_WPL;
- II.判断是否符合前缀码;
I.计算mini_WPL
v1.基本去模拟(代码量大),先建立huffmanTree实体结构,再通过HuffmanTree计算WPL.这里容易误导我们以为必须建立HuffTree才能计算WPL,实则不然.
v2.小根堆模拟huffman建立过程,只是并不产生实体,若把小根堆的元素换成huffman节点,那就是在建立huffmanTree.
II.判断是否符合前缀码
v1.建树去模拟
v2.直接比较内容,任意取两个字符串(s1, s2)去与(|)得到结果result(=s1 & s2), 若result 和s1,s2其中之一相等就肯定出现重复. drawback:时间复杂度太高, O(n^2)
/**
index ch fre
0 A 2
1 B 2
...
*/
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
struct tablenode{
char ch;
int frequency;
}Table[128];
// remember to clear the space
struct HeapNode{
int *Ele;
int size;
int capacity;
};
typedef struct HeapNode* Heap;
typedef struct huffnode{
int Isleaf;
struct huffnode* lc;
struct huffnode* rc;
}*HuffNode;
void myread(Heap h, int n);
Heap CreateSmallHeap(int maxsize);
void Insert2Heap(Heap smallheap, int X);
void Initialize(Heap smallheap);
int DeleteOfHeap(Heap smallheap);
void AdjustSubheap(Heap smallheap, int index);
int WPL(Heap smallheap);
int JudgeHuff(int dst_wpl, int n);
HuffNode NewHuff();
void FreeHuff(HuffNode root);
int main(){
int i, n, dst_wpl, group;
memset(Table, 0, 128*sizeof(struct tablenode));
Heap h = CreateSmallHeap(64);
scanf(" %d", &n);
myread(h, n);
dst_wpl = WPL(h);
scanf(" %d", &group);
for(i=0; i<group; i++){
JudgeHuff(dst_wpl, n);
}
return 0;
}
/**
* 判断一次Huffmancode是否符合要求. 两个角度:1.长度,2.前缀
* 注意:一定要有n次输入,不能失败就结束程序
*/
int JudgeHuff(int dst_wpl, int n){
int i, j, tag=1, wpl=0;
char buff[65], c;
HuffNode root = NewHuff(), p=root;
for(i=0; i<n; i++){
p=root; // need to be initialized every time
scanf(" %c %s", &c, buff);
int len = strlen(buff);
if( tag && len > n-1){
tag = 0;
}
if( tag ){ //计算wpl和判断是否符合前缀码
wpl += len* Table[ (int) c].frequency;
j=0;
while( tag && j < len){
if( p->Isleaf){ //control go to a leaf but buffcoding still have more characters
tag=0;
break;
}
switch(buff[j]){
case '0':
if( !p->lc ){
p->lc = NewHuff();
p = p->lc;
if(j == len-1){
p->Isleaf = 1;
}
}else{ // node is existed
if( (j<len-1 && p->lc->Isleaf) || j==len-1 ){
tag=0;
break;
}
p = p->lc;
}
break;
case '1':
if( !p->rc ){
p->rc = NewHuff();
p = p->rc;
if(j == len-1){
p->Isleaf = 1;
}
}else{ // node is existed
if( (j<len-1 && p->rc->Isleaf) || j==len-1 ){
tag=0;
break;
}
p = p->rc;
}
break;
default:
printf("Buff[] Error!\n");
exit(1);
}
j++;
}
}
}
FreeHuff(root);
if(tag == 1 && wpl == dst_wpl){
printf("Yes\n");
}else{
printf("No\n");
}
return 0;
}
/**
*
*/
void FreeHuff(HuffNode root){
if(root == NULL){
return ;
}
if(root->lc){
FreeHuff(root->lc);
}
if(root->rc){
FreeHuff(root->rc);
}
free(root);
}
/**
* creat a huffnode
*/
HuffNode NewHuff(){
HuffNode p = malloc(sizeof(struct huffnode));
if(!p){
printf("Out of memory!\n");
exit(1);
}
p->Isleaf = 0;
p->lc = NULL;
p->rc = NULL;
return p;
}
void myread(Heap h, int n){
int i, buff;
char c;
for(i=0; i<n; i++){
scanf(" %c",&c);
scanf(" %d", &buff);
//insert data in to heap
h->Ele[i+1] = buff;
h->size++;
Table[ (int)c].frequency = buff;
}
Initialize(h);
return;
}
/**
* 按照给定的大小建立一个小根堆,小根堆中下标0元素放置了最小元素--哨兵
*/
Heap CreateSmallHeap(int maxsize){
int *arr = (int *)malloc( (maxsize+1) * sizeof(int));
Heap h = (Heap) malloc(sizeof(struct HeapNode));
h->Ele = arr;
h->size = 0;
h->capacity = maxsize;
arr[0] = 1<<((sizeof(int)*8)-1) ; //minimum in int
return h;
}
void Insert2Heap(Heap smallheap, int X){
int i, p;
Heap h = smallheap;
++(h->size);
//从上往下拉,让出合适的位置给新插入的元素X
for( i=h->size; X < h->Ele[i/2]; i=p){
p = i/2;
if( X < h->Ele[p]){
h->Ele[i] = h->Ele[p];
continue;
}
break;
}
h->Ele[i] = X;
}
/**
* return: the value of the element that be deleted
*/
int DeleteOfHeap(Heap smallheap){
if(smallheap->size <=0 )
{
printf("DeleteOfheap() Error! heap is empty\n");
exit(1);
}
int ret_val = smallheap->Ele[1];
int new_val = smallheap->Ele[smallheap->size--];
// new_value replace the root then adjusting the subheap
smallheap->Ele[1] = new_val;
AdjustSubheap(smallheap, 1);
return ret_val;
}
/**
* adujust the subheap whose index is index from top-down
* premise: the root of the subheap is modified
*/
void AdjustSubheap(Heap smallheap, int index){
int size = smallheap->size;
if(index > size/2 || index < 1){ // return if the node is leaves
return ;
}
int i, child_pos;
int new_val = smallheap->Ele[index];
for(i=index; i*2<=size; i = child_pos){ // adjust from top to down
child_pos=2*i;
//if right child is existed
if(child_pos+1 <= size){
if(smallheap->Ele[child_pos] > smallheap->Ele[child_pos+1]){
child_pos++;
}
}
if(smallheap->Ele[child_pos] < new_val){
smallheap->Ele[i] = smallheap->Ele[child_pos];
}else{
break;
}
}
smallheap->Ele[i] = new_val;
return ;
}
/**
* adjust elements in chaos to keep the propertied of heap
*/
void Initialize(Heap smallheap){
int size = smallheap->size;
if(size < 2){
return;
}
int i;
for(i=size/2; i>0; i--){
AdjustSubheap(smallheap, i);
}
}
/**
* caculate WPL of a small Heap
*/
int WPL(Heap smallheap){
int size = smallheap->size, rlt=0;
int i;
for(i=1; i<=size-1; i++){
int tmp = DeleteOfHeap(smallheap);
tmp += DeleteOfHeap(smallheap);
rlt +=tmp;
Insert2Heap(smallheap,tmp);
}
return rlt;
}