菜鸡奋斗路05-树9 Huffman Codes

最新推荐文章于 2021-10-26 21:50:12 发布

Prayotter

最新推荐文章于 2021-10-26 21:50:12 发布

阅读量283

点赞数

分类专栏： data structe

本文链接：https://blog.csdn.net/qq_41829562/article/details/80430658

版权

data structe 专栏收录该内容

22 篇文章 0 订阅

订阅专栏

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer $N$ ( $2 \leq N \leq 63$ ), then followed by a line that contains all the $N$ distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer $M$ ( $\leq 1000$ ), then followed by $M$ student submissions. Each student submission consists of $N$ lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No

作者: 陈越

单位: 浙江大学

时间限制: 400ms

内存限制: 64MB

代码长度限制: 16KB

个人分析：哈夫曼树的应用。帮助题目中的老师判断：1.学生提交是否为哈夫曼树 2.同组编码哈夫曼树可能不同，要求能正确识别。

乍一看：

对于第一个点，可以通过构造一个函数，判断学生输入是否符合前缀码条件来得到实现。

对于第二个点，想到同组编码的不同哈夫曼树，都是可以通过左右儿子（非叶结点）交换得到的，可以根据这个特点进行判断。

然而，问题并没有这么简单：是前缀码/是哈夫曼树，这两之间并不是充分必要关系。即使是前缀码，也不一定就是哈夫曼树：

其次，第二个点，判断左右儿子交换后是否同构，也是多此一举。。。。。（当时写的时候写了好久T T泪目），只要同时满足最小wpl+前缀码条件，就一定是正确的哈夫曼树了。。。。

而学生的wpl值是相对容易求得的，只需要根据学生输入编码的长度*频次，在加和就可以了。而正确的最小wpl，可以通过自己建立一棵哈夫曼树，再以此计算得到。那么最大的问题就是：如何判断前缀码条件了。

所谓前缀码，也就是在一堆编码串里，a串不能在b串里出现，比如a:01，b：011，c：1这样就是不符合条件的，因为011既可以对应b，也可以对应ac。那么通过把所有编码串一一对比，判断是否有子串情况，若没有，就是正确前缀码，如果有那就不是。

上代码：

#include<stdio.h>
#include<stdlib.h>
#define Maxsize 63
int number1=0;
//哈夫曼树定义 
typedef struct TreeNode *HuffmanTree;
struct TreeNode{
	int weight;
	HuffmanTree left,right;
};
//最小堆定义 
typedef struct HNode *MinHeap; 
struct HNode{
	TreeNode *Element;
	int size;
	int capacity;
}; 
//堆有序化为最小堆 
void percDown(MinHeap H,int i)
{
	int parent,child;
	TreeNode X;
	X=H->Element[i];
	for(parent=i;parent*2<=H->size;parent=child)
	{	
		child=2*parent;
		if(child!=H->size&&(H->Element[child].weight)>(H->Element[child+1].weight))
			child++;
		if((H->Element[child].weight)<(X.weight))
		{
			H->Element[parent]=H->Element[child];
		}
		else
			break;
	}
	H->Element[parent]=X;
}
void BuildMinHeap(MinHeap H)
{	
	for(int i=H->size/2;i>0;i--) 
		percDown(H,i);
}
//堆的插入，删除，判满，判空，初始化函数
MinHeap CreateHeap()
{
	MinHeap H=(MinHeap)malloc(sizeof(struct HNode));
	H->Element=(TreeNode*)malloc((Maxsize+1)*sizeof(TreeNode));
	H->size=0;
	H->capacity=Maxsize;
	H->Element[0].weight=0;
	return H;
}
bool IsFull(MinHeap H)
{
	return(H->size==H->capacity);
}
bool Insert(MinHeap H,HuffmanTree T)
{
	int i;
	if(IsFull(H))
	{
		printf("最小堆已满");
		return false; 
	}
	i=++H->size;
	for(;(H->Element[i/2].weight)>(T->weight);i/=2)
		H->Element[i]=H->Element[i/2];
	H->Element[i]=*T;
	return true;
}
bool IsEmpty(MinHeap H)
{
	return(H->size==0);
}
HuffmanTree DeleteMin(MinHeap H)
{
	int parent,child;
	HuffmanTree MinItem,X;
	
	if(IsEmpty(H))
	{
		printf("最小堆已为空"); 
	}
	MinItem=(HuffmanTree)malloc(sizeof(struct TreeNode));
	X=(HuffmanTree)malloc(sizeof(struct TreeNode));
	
	*MinItem=H->Element[1];
	*X=H->Element[H->size];
	H->size=H->size-1;
	for(parent=1;parent*2<=H->size;parent=child)
	{
		child=parent*2;
		if((child!=H->size)&&(H->Element[child].weight)>(H->Element[child+1].weight))
			child++;
		if((X->weight)<=(H->Element[child].weight))
			break;
		else
			H->Element[parent]=H->Element[child];
	}
	H->Element[parent]=*X;
	return MinItem;
} 

HuffmanTree Huffman(MinHeap H)
{
	int i; HuffmanTree T;
	 
	BuildMinHeap(H);
	int times=H->size;
	for(i=1;i<times;i++)
	{
		T=(HuffmanTree)malloc(sizeof(struct TreeNode));
		T->left=DeleteMin(H);
		T->right=DeleteMin(H);
		T->weight=T->left->weight+T->right->weight;
		number1+=T->weight; //计算哈夫曼树的wpl值 
		Insert(H,T);
	}
	T=DeleteMin(H);
	return T;
}

bool Compare(char *c1,char *c2)    //遍历两编码串，判断是否有子串情况存在
{
	while (*c1 == *c2 && *c1 != '\0') 
	{
        ++c1;
        ++c2;
    }
    return *c1 == '\0' || *c2 == '\0';
}

int code_length(char *a)
{
	char *p=a;int len=0;
	while(*p != '\0')
	{    
        p++;    
        len++;    
    }    
    return len;    
}
int main()
{	
	int N,M;
	scanf("%d",&N);
	char c[N];int data[N];     //data[]计频次
	getchar();	
	MinHeap H=CreateHeap();//初始化堆，并将哈夫曼树的结点权值放在堆中 
	for(int i=1;i<=N;i++)
	{	
		if(i<N)
		{
			scanf("%c %d ",&c[i],&data[i-1]);    //由于最小堆的第一个元素是哨兵，循环从i=1开始做
		}                                      //同时data[]从0下标开始计
		else
			scanf("%c %d",&c[i],&data[i-1]);     //其实这个c[]并没有什么软用..就是为了读掉输入
		H->Element[i].weight=data[i-1];
		H->Element[i].left=NULL;
		H->Element[i].right=NULL;
		H->size++; 
	}	
//先根据字符频次，得出其哈夫曼树,计算得wpl 
	HuffmanTree T;
	T=Huffman(H); 
//储存学生输入 
	getchar();
	scanf("%d",&M);
	getchar();
	char ch[N],code[N][Maxsize+1];
	for(int j=0;j<M;j++)
	{	int flag=0;
		int number2=0; 
		for(int k=0;k<N;k++)
		{
			scanf("%c %s",&ch[k],&code[k]);
			getchar();				
		} 
		//计算学生输入码的wpl
		for(int i=0;i<N;i++)
		{
			number2+=(code_length(code[i]))*data[i];
		}
		if(number2>number1&&flag==0)
		{
			printf("No\n");
			flag=1;
		}
		else
		{		//判断学生码是否为前缀码
			for(int i=1;i<N;i++)
			{
				for(int j=0;j<i;j++)
				{
					if(Compare(code[i],code[j])&&flag==0)
					{
						printf("No\n");
						flag=1;
					}
				}	
			}
			if(flag==0)
				printf("Yes\n");
		}
	}
}

检测结果：