5-9 Huffman Codes (30分)

最新推荐文章于 2020-04-20 19:09:19 发布

老问题

最新推荐文章于 2020-04-20 19:09:19 发布

阅读量414

点赞数 1

分类专栏：数据结构学习

本文链接：https://blog.csdn.net/qq_32511479/article/details/54627634

版权

数据结构学习专栏收录该内容

38 篇文章 1 订阅

订阅专栏

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer $N$ ( $2\le N\le 63$ ), then followed by a line that contains all the $N$ distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i]and is an integer no more than 1000. The next line gives a positive integer $M$ ( $\le 1000$ ), then followed by $M$ student submissions. Each student submission consists of $N$ lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No






这道题写了很久，把每个步骤完全弄明白实在是不简单。看了一些其他人的博客，最后参考Roland_WuZF的思路和方法http://blog.csdn.net/roland_wuzf/article/details/49474841。自己写的时候也是改了又改，最后改成这样我也是醉了。还是要把步骤画出来，这样看得更清楚一点。



#include <stdio.h>  
#include <stdlib.h>  
#include <string.h>  

typedef struct node{
	char c;
	int f;
}Node;

struct HNode{
	int *data;
	char *ch;
	int size;
};
typedef struct HNode *Heap;

typedef struct primitive{
	char c;
	int f;
}Primit;

typedef struct code{
	char c;
	char *ch;
}Code;

Heap create(int Maxsize)
{
	Heap H = (Heap)malloc(sizeof(struct HNode));
	 H->data = (int *)malloc((Maxsize + 1) * sizeof(int));
	 H->ch = (char *)malloc((Maxsize + 1) * sizeof(char));
	 H->size = 0;
	 H->data[0] = -10000;
	 H->ch[0] = 0;
	return H;
}

void Insert(Heap H,char c,int f)
{
	int i;
	i = ++H->size;
	for(;H->data[i/2] > f; i = i / 2)
	{
		H->data[i] = H->data[i/2];
	}
	H->data[i] = f;
	H->ch[i] = c;
	
}

Node del(Heap H)
{
	Node Min;
	int F,parent,child;
	char C;
	Min.f = H->data[1];
	Min.c = H->ch[1];
	F = H->data[H->size];
	C = H->ch[H->size--];
	for(parent = 1; parent * 2 <= H->size; parent = child)
	{
		child = parent * 2;
		if(child != H->size && (H->data[child] > H->data[child + 1]))
		{
			child++;
		}
		if(F < H->data[child])break;
		H->data[parent] = H->data[child];
	} 
	H->data[parent] = F;
	H->ch[parent] = C;
	return Min;
}

int WPL(Heap H)
{
	int wpl = 0;
	int size = H->size;
	Node left,right,hfm;
	for(int i = 0; i < size-1; i++)
	{
		left = del(H);
		right = del(H);
		hfm.f = left.f + right.f;
		hfm.c = '*';
		wpl += hfm.f;
		Insert(H,hfm.c,hfm.f);
	//	printf("left = %d, right = %d, hfm.f = %d, wpl = %d\n",left.f,right.f,hfm.f,wpl);
	}
	return wpl;
}

int Find(Primit *q,char c,int N)
{
	Primit *p = q;
	int flag = -1;
	for(int i = 1; i <= N; i++)
	{
		if(p[i].c == c)
		{
			flag = p[i].f;break;
		}
	}
	return flag;
}

int check(char a[],char b[])
{
	char *big;
	char *small;
	if(strlen(a) >= strlen(b))
	{
		big = a;
		small = b;
	}
	else
	{
		big = b;
		small = a;
	}
	return strstr(big,small) == big;
}

int main(void)
{
	int N;
	char c;
	int f;
	scanf("%d",&N);	
	Primit *P = (Primit *)malloc((N)*sizeof(Primit));
	Heap H = create(N);
	for(int i = 1; i <= N; i++)
	{	
		getchar();
		scanf("%c %d",&c,&f);
		P[i].c = c;
		P[i].f = f;
		Insert(H,c,f);
	}
	int wpl = WPL(H);/*最优编码，可以表示为除了叶节点外的所有节点的权值之和就是WPL*/
					 /*也就是相当与每个字符对应的权值在形成Huffman树的过程中被加的次数（这个次数相当与路径）乘以权值之和*/ 
//	printf("wpl = %d",wpl);
	int M;
	Code stcode[N];
	scanf("%d",&M);
	int flag = 0;
	int stwpl = 0;
	while(M--)
	{
		stwpl = 0;
		for(int i = 1; i <= N; i++)
		{
			getchar();
			stcode[i].ch = (char *)malloc((N)*sizeof(char));
			scanf("%c %s",&stcode[i].c,stcode[i].ch);
			int flu = Find(P,stcode[i].c,N);/*找到字符并返回相应的权值*/
		//	printf("flu = %d\n",flu);
			if(flu == -1)
			{
				flag = -1;/*输入了其他字符*/
			}
			else
			{
				stwpl += flu*strlen(stcode[i].ch);/*学生输入编码的wpl，可以用字符串的长度来表示字符在Huffman树中的路径*/
			}
		}
		
		
		int flag2 = 0;
		for(int i = 1; i <= N; i++)
		{
			for(int j = i+1; j <= N; j++)
			{
				flag2 = check(stcode[i].ch,stcode[j].ch);/*采用strstr函数，判断短的字符串是否是长的字符串的子串*/
				if(flag2)break;					/*strstr(str1,str2) 函数用于判断字符串str2是否是str1的子串。
												如果是，则该函数返回str2在str1中首次出现的地址；否则，返回NULL。*/
			}
			if(flag2)break;
		}
		
		if(flag == -1)
		{
			printf("No\n");
		}
		else
		{
			if(stwpl != wpl)/*判断学生输入编码的wpl是否是最优编码*/ 
			{
				printf("No\n");
			}
			else
			{
				if(flag2)/*是否有某个字符的编码是另一个字符编码的前缀*/
				{
					printf("No\n");
				}
				else
				{
					printf("Yes\n");
				}
			}
		}
	//	printf("flag = %d, stwpl = %d, flag2 = %d\n",flag,stwpl,flag2);
	}
	return 0;
}