菜鸡奋斗路05-树9 Huffman Codes

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2N63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (1000), then followed by M student submissions. Each student submission consists of Nlines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No
作者: 陈越
单位: 浙江大学
时间限制: 400ms
内存限制: 64MB
代码长度限制: 16KB


个人分析:哈夫曼树的应用。帮助题目中的老师判断:1.学生提交是否为哈夫曼树 2.同组编码哈夫曼树可能不同,要求能正确识别。

乍一看:

对于第一个点,可以通过构造一个函数,判断学生输入是否符合前缀码条件来得到实现。

对于第二个点,想到同组编码的不同哈夫曼树,都是可以通过左右儿子(非叶结点)交换得到的,可以根据这个特点进行判断。

然而,问题并没有这么简单:是前缀码/是哈夫曼树,这两之间并不是充分必要关系。即使是前缀码,也不一定就是哈夫曼树:


其次,第二个点,判断左右儿子交换后是否同构,也是多此一举。。。。。(当时写的时候写了好久T T泪目),只要同时满足最小wpl+前缀码条件,就一定是正确的哈夫曼树了。。。。

而学生的wpl值是相对容易求得的,只需要根据学生输入编码的长度*频次,在加和就可以了。而正确的最小wpl,可以通过自己建立一棵哈夫曼树,再以此计算得到。那么最大的问题就是:如何判断前缀码条件了。

所谓前缀码,也就是在一堆编码串里,a串不能在b串里出现,比如a:01,b:011,c:1这样就是不符合 条件的,因为011既可以对应b,也可以对应ac。那么通过把所有编码串一一对比,判断是否有子串情况,若没有,就是正确前缀码,如果有那就不是。

上代码:

#include<stdio.h>
#include<stdlib.h>
#define Maxsize 63
int number1=0;
//哈夫曼树定义 
typedef struct TreeNode *HuffmanTree;
struct TreeNode{
	int weight;
	HuffmanTree left,right;
};
//最小堆定义 
typedef struct HNode *MinHeap; 
struct HNode{
	TreeNode *Element;
	int size;
	int capacity;
}; 
//堆有序化为最小堆 
void percDown(MinHeap H,int i)
{
	int parent,child;
	TreeNode X;
	X=H->Element[i];
	for(parent=i;parent*2<=H->size;parent=child)
	{	
		child=2*parent;
		if(child!=H->size&&(H->Element[child].weight)>(H->Element[child+1].weight))
			child++;
		if((H->Element[child].weight)<(X.weight))
		{
			H->Element[parent]=H->Element[child];
		}
		else
			break;
	}
	H->Element[parent]=X;
}
void BuildMinHeap(MinHeap H)
{	
	for(int i=H->size/2;i>0;i--) 
		percDown(H,i);
}
//堆的插入,删除,判满,判空,初始化函数
MinHeap CreateHeap()
{
	MinHeap H=(MinHeap)malloc(sizeof(struct HNode));
	H->Element=(TreeNode*)malloc((Maxsize+1)*sizeof(TreeNode));
	H->size=0;
	H->capacity=Maxsize;
	H->Element[0].weight=0;
	return H;
}
bool IsFull(MinHeap H)
{
	return(H->size==H->capacity);
}
bool Insert(MinHeap H,HuffmanTree T)
{
	int i;
	if(IsFull(H))
	{
		printf("最小堆已满");
		return false; 
	}
	i=++H->size;
	for(;(H->Element[i/2].weight)>(T->weight);i/=2)
		H->Element[i]=H->Element[i/2];
	H->Element[i]=*T;
	return true;
}
bool IsEmpty(MinHeap H)
{
	return(H->size==0);
}
HuffmanTree DeleteMin(MinHeap H)
{
	int parent,child;
	HuffmanTree MinItem,X;
	
	if(IsEmpty(H))
	{
		printf("最小堆已为空"); 
	}
	MinItem=(HuffmanTree)malloc(sizeof(struct TreeNode));
	X=(HuffmanTree)malloc(sizeof(struct TreeNode));
	
	*MinItem=H->Element[1];
	*X=H->Element[H->size];
	H->size=H->size-1;
	for(parent=1;parent*2<=H->size;parent=child)
	{
		child=parent*2;
		if((child!=H->size)&&(H->Element[child].weight)>(H->Element[child+1].weight))
			child++;
		if((X->weight)<=(H->Element[child].weight))
			break;
		else
			H->Element[parent]=H->Element[child];
	}
	H->Element[parent]=*X;
	return MinItem;
} 

HuffmanTree Huffman(MinHeap H)
{
	int i; HuffmanTree T;
	 
	BuildMinHeap(H);
	int times=H->size;
	for(i=1;i<times;i++)
	{
		T=(HuffmanTree)malloc(sizeof(struct TreeNode));
		T->left=DeleteMin(H);
		T->right=DeleteMin(H);
		T->weight=T->left->weight+T->right->weight;
		number1+=T->weight; //计算哈夫曼树的wpl值 
		Insert(H,T);
	}
	T=DeleteMin(H);
	return T;
}

bool Compare(char *c1,char *c2)    //遍历两编码串,判断是否有子串情况存在
{
	while (*c1 == *c2 && *c1 != '\0') 
	{
        ++c1;
        ++c2;
    }
    return *c1 == '\0' || *c2 == '\0';
}

int code_length(char *a)
{
	char *p=a;int len=0;
	while(*p != '\0')
	{    
        p++;    
        len++;    
    }    
    return len;    
}
int main()
{	
	int N,M;
	scanf("%d",&N);
	char c[N];int data[N];     //data[]计频次
	getchar();	
	MinHeap H=CreateHeap();//初始化堆,并将哈夫曼树的结点权值放在堆中 
	for(int i=1;i<=N;i++)
	{	
		if(i<N)
		{
			scanf("%c %d ",&c[i],&data[i-1]);    //由于最小堆的第一个元素是哨兵,循环从i=1开始做
		}                                      //同时data[]从0下标开始计
		else
			scanf("%c %d",&c[i],&data[i-1]);     //其实这个c[]并没有什么软用..就是为了读掉输入
		H->Element[i].weight=data[i-1];
		H->Element[i].left=NULL;
		H->Element[i].right=NULL;
		H->size++; 
	}	
//先根据字符频次,得出其哈夫曼树,计算得wpl 
	HuffmanTree T;
	T=Huffman(H); 
//储存学生输入 
	getchar();
	scanf("%d",&M);
	getchar();
	char ch[N],code[N][Maxsize+1];
	for(int j=0;j<M;j++)
	{	int flag=0;
		int number2=0; 
		for(int k=0;k<N;k++)
		{
			scanf("%c %s",&ch[k],&code[k]);
			getchar();				
		} 
		//计算学生输入码的wpl
		for(int i=0;i<N;i++)
		{
			number2+=(code_length(code[i]))*data[i];
		}
		if(number2>number1&&flag==0)
		{
			printf("No\n");
			flag=1;
		}
		else
		{		//判断学生码是否为前缀码
			for(int i=1;i<N;i++)
			{
				for(int j=0;j<i;j++)
				{
					if(Compare(code[i],code[j])&&flag==0)
					{
						printf("No\n");
						flag=1;
					}
				}	
			}
			if(flag==0)
				printf("Yes\n");
		}
	}
} 

检测结果:


总结:这道题很有意义,菜鸡感觉自己写的东西真的有点用了。至少,能得到最短最节省空间的编码。很开心也很开心~哈夫曼老人家1953年就写了这篇paper,感觉自己仿佛活在梦里= =这都差了60年了,我才学明白。。。anyway,再接再厉!!


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值