05-树9 Huffman Codes

最新推荐文章于 2022-04-24 22:09:29 发布

Roland_WuZF

最新推荐文章于 2022-04-24 22:09:29 发布

阅读量2.7k

点赞数 4

本文链接：https://blog.csdn.net/Roland_WuZF/article/details/49474841

版权

这题目看起来挺大的，其实细细分析起来，主要是这么几个。首先，如何根据各个字符出现的频率，求出WPL；其次，如何比较学生的输入是否是正确的最优编码。

对第一个问题，其实可以发现，在利用WPL最优算法构造Huffman Tree的时候，除了叶节点外的所有节点的频率之和就是WPL（证明略）。因此，只要写个最小堆，塞进去之后，拿出两个最小值来，求和，然后重复。只要把这些和都加起来，就是WPL了。

第二个问题，有这么几个方面需要考察。首先，是否输入了其他的字符；其次，得到的编码长度是否是WPL；最后，是否有某个字符的编码是另一个字符编码的前缀。对于第一个和第二个，将最开始输入的字符以及频率保存下来，之后每次将学生的答案输入时，都对学生输入的字符进行遍历，找到对应的频率（没有就表示输入其他字符），然后将频率与对应的编码长度相乘，累加求和，得到最终编码长度，在和WPL相比即可。找前缀，需要将学生输入的元素两两比较（两个for循环），在较长的编码中需找较短的编码（strstr（a,b）函数，在字符串a中寻找b，若有，返回从第一个字符串b开始直到末尾，没有则返回NULL），若返回的仍是较长字符串，则为前缀，否则不是。这样，整个问题就被解决了。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int Maxsize=0;

typedef struct _node{
	int f;
	char c;
}Node;

typedef struct _code{
	char c;
	char *code;
}Code;

typedef struct _heap{
	Node *p;
	int nowsize;
}Heap;

void insert_heap(Heap *heap,Node record);
Node dele_heap(Heap *heap);
int cnt_wpl(Heap *heap);
int find_flu(Node *record,char c);
int check(char a[],char b[]);

int main(){
	scanf("%d",&Maxsize);
	
	Heap *heap=(Heap*)malloc(sizeof(Heap)); //创建堆，为了哈夫曼树 
	heap->nowsize=0;
	heap->p=(Node*)malloc((Maxsize+1)*sizeof(Node));
	heap->p[0].f=-10000;
	int wpl;
	
	Node *record=(Node*)malloc((Maxsize)*sizeof(Node));//存储字符以及出现的频率 
	int i,j;
	for(i=0;i<Maxsize;i++){
		getchar();
		scanf("%c",&record[i].c);
		scanf("%d",&record[i].f);
		insert_heap(heap,record[i]);
	}
	
	wpl=cnt_wpl(heap);//计算哈夫曼编码的最优WPL 
//	printf("%d\n",wpl);
//	for(i=0;i<Maxsize;i++){
//		printf("%c %d\n",record[i].c,record[i].f);
//	}
	
	
	Code stcode[Maxsize]; //记录学生输入的字符以及对应的编码。 
	int flu=0;
	int flag=0;
	int stwpl=0;
	int number=0;
	int k;
	scanf("%d",&number);
	
	for(k=0;k<number;k++){
		stwpl=0;
		for(i=0;i<Maxsize;i++){
			getchar();
			stcode[i].code=(char*)malloc(Maxsize*sizeof(char));
			scanf("%c %s",&stcode[i].c,stcode[i].code);
			flu=find_flu(record,stcode[i].c);
			printf("flu= %d\n",flu);	
			if(flu==-1){
				flag=-1;   //表示输入了原本不存在的字符 
			}
			else
			{
				stwpl=flu*strlen(stcode[i].code)+stwpl;  //计算学生输入编码的WPL 
			}
		}
		
		int flag2=0;
		for(i=0;i<Maxsize;i++){
			for(j=i+1;j<Maxsize;j++){
				flag2=check(stcode[i].code,stcode[j].code);
				if(flag2){
					break;
				}
			}
			if(flag2){
				break;
			}
		}
		
		if(flag==-1){
			printf("DIF CHAR No\n");
		}
		else
		{
			if(stwpl!=wpl){
				printf("DIF LENGNo\n");
			}else{
				if(flag2){
					printf("REPET No\n");
				}
				else
				{
					printf("Yes\n");
				}
			}
		}
	}	
	return 0;
}

int check(char a[],char b[]){ //测试b是否是a的前缀 
	char *big;
	char *small;
	if(strlen(a)>=strlen(b)){
		big=a;small=b;
	}
	else
	{
		big=b;small=a;
	}
	return strstr(big,small)==big;
}

int find_flu(Node *record,char c){   //寻找字符c对应的频率，输入不存在字符返回-1 
	Node *p=record;
	int i=0;
	int flag=-1;
	for(i=0;i<Maxsize;i++){
		if(p[i].c==c){
			flag=p[i].f;
			break;
		}	
	}
	return flag;
}

int cnt_wpl(Heap *heap){  //计算WPL 
	int i;
	int record=0;
	int size=heap->nowsize;
	Node a1;
	Node a2;
	Node b1;
	for(i=0;i<size-1;i++){
		a1=dele_heap(heap);
		a2=dele_heap(heap);
		b1.f=a1.f+a2.f;
		b1.c='*';
		record+=b1.f;
		insert_heap(heap,b1);
	}
	return record;
}

Node dele_heap(Heap *heap){   //出堆 
	if(heap->nowsize==0){
		printf("Heap is empty");
		return;
	}
	else
	{
		Node record;
		record=heap->p[1];
		Node tmp=heap->p[heap->nowsize];
		heap->nowsize-=1;
		int pa=1,ch=0;		
		
		for(pa=1;2*pa<=heap->nowsize;pa=ch){  //，从顶向下过滤，注意终止条件 
			
			ch=2*pa;
			if(ch != heap->nowsize && (heap->p[ch].f > heap->p[ch+1].f ))  //注意第一个判断条件 
			{
					ch++;
			}
			if(tmp.f < heap->p[ch].f){
				break;
			}else{
				heap->p[pa]=heap->p[ch];
			}
			
		}
		heap->p[pa]=tmp;
		return record;
	}
	
}

void insert_heap(Heap *heap,Node record){  //入堆 ，从底向上过滤 
	if(heap->nowsize==0 ){
		heap->p[1]=record;
		heap->nowsize+=1;
	}
	else
	{
		int i=heap->nowsize+1;
		for(;heap->p[i/2].f > record.f;i=i/2){
			heap->p[i]=heap->p[i/2];
		}
		heap->p[i]=record;
		heap->nowsize+=1;
	}
}

05-树9 Huffman Codes (30分)

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer $N$ ( $2 \leq N \leq 63$ ), then followed by a line that contains all the $N$ distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, andf[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer $M$ ( $\leq 1000$ ), then followed by $M$ student submissions. Each student submission consists of $N$ lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No

Roland_WuZF

关注

4
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
05-树9 Huffman Codes

这题目看起来挺大的，其实细细分析起来，主要是这么几个。首先，如何根据各个字符出现的频率，求出WPL；其次，如何比较学生的输入是否是正确的最优编码。对第一个问题，其实可以发现，在利用WPL最优算法构造Huffman Tree的时候，除了叶节点外的所有节点的频率之和就是WPL（证明略）。因此，只要写个最小堆，塞进去之后，拿出两个最小值来，求和，然后重复。只要把这些和都加起来，就是WPL了。
复制链接

扫一扫