05-树9 Huffman Codes (30分)(数据结构)(C语言实现)(哈夫曼树)

最新推荐文章于 2021-08-04 09:32:54 发布

mapleshl

最新推荐文章于 2021-08-04 09:32:54 发布

阅读量881

点赞数 5

分类专栏：数据结构文章标签：算法数据结构二叉树

本文链接：https://blog.csdn.net/shenbossed/article/details/106253811

版权

数据结构专栏收录该内容

31 篇文章 4 订阅

订阅专栏

05-树9 Huffman Codes (30分)
In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] … c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0’s and '1’s.

Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No

我们确立哈夫曼编码的时候，我们首先确定这个编码一定是最优编码，即总长度WPL最小，其次这个编码没有歧义，前缀码没有二义性，数据仅存于叶节点中。

所以，我们的核心算法首先是计算最优编码长度。

我们需要用最小堆来建立哈夫曼树，每次从最小堆中调用两个元素，然后再把这昂两个元素的权重和压回最小堆中。以此往复。然后我们还需要计算这颗哈夫曼树的总权重，递归去求，也就是左子树的权重加上右子树的权重。

然后就是检查长度是否正确，建树的过程中是否满足前缀码要求。

编码长度最坏的情况下就是N-1 在这里插入图片描述
如果编码长度超过n-1，则当读完的时候输出错误。

在测试前缀码是否符合要求的时候，我们需要建立相应的树。
在这里插入图片描述
对于1，我们就建立右子树，0，就是建立左子树。并且都是空节点。当到最后的时候将最后一个节点给与权重。

到这里的时候我们还是满足条件。
但是我们来看下一个的时候会怎么样？

到这里额时候我们会发现，在经历第二个0的时候，我们碰到了带权重的结点。所以这就不满足前缀码的条件了。
在这里插入图片描述
当到这一个的时候，我们就会发现，到最后一个1的时候，不是叶子结点，所以也不符合条件。

我们来看一下代码
首先是前面的一些定义

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define Maxsize 64
typedef struct TreeNode *Huffman;
char ch[Maxsize];
int N,w[Maxsize],TotalCodes;
struct TreeNode//树节点
{
	int weight;
	Huffman left,right;
};
typedef struct HeapNode *MinHeap;
struct HeapNode//最小堆
{
	Huffman data[Maxsize];
	int size;	
};

要实现的函数：

MinHeap CreateHeap();//创造最小堆
Huffman CreateHuffman();//创造哈夫曼树
void Insert(MinHeap H,Huffman x);//往最小堆插入元素
Huffman DeleteMin(MinHeap H);//从最小堆抛出一个元素
Huffman BuildHuffman(MinHeap H);//建造哈夫曼树
int WPL(Huffman root, int depth);//计算权重
int judge();//判断

主函数入口：

int main()
{
	int M;
	Huffman tmp,root;
	scanf("%d",&N);
	MinHeap H=CreateHeap();
	for(int i=0;i<N;i++)
	{
		getchar();//吸收缓冲区字符
		scanf("%c %d",&ch[i],&w[i]);//ch数组存相应的编码符号,w数组存对应的频率
		tmp=CreateHuffman();
		tmp->weight=w[i];
		Insert(H,tmp);//往堆中插入频率
	}
	root=BuildHuffman(H);//建造哈夫曼树
	TotalCodes=WPL(root,0);//计算总权重
	scanf("%d",&M);
	for(int i=0;i<M;i++)//判断
	{
		if(judge())
			printf("Yes\n");
		else
			printf("No\n");
	}
	return 0;

}

建造最小堆

MinHeap CreateHeap()
{
	MinHeap H;
	H=(MinHeap)malloc(sizeof(struct HeapNode));
	H->size=0;
	H->data[0]=(Huffman)malloc(sizeof(struct TreeNode));
	H->data[0]->left=H->data[0]->right=NULL;
	H->data[0]->weight=-1;
	return H;
}

建立哈夫曼结点

Huffman CreateHuffman()
{
	Huffman H;
	H=(Huffman)malloc(sizeof(struct TreeNode));
	H->left=H->right=NULL;
	H->weight=0;
	return H;
}

往最小堆中插入元素

void Insert(MinHeap H,Huffman x)
{
	int i=++H->size;
	while(x->weight<H->data[i/2]->weight)
	{
		H->data[i]=H->data[i/2];
		i/=2;
	}
	H->data[i]=x;
}

从最小堆中删除最小的元素

Huffman DeleteMin(MinHeap H)
{
	Huffman Mintem,temp;
	int parent,child;
	Mintem=H->data[1];
	temp=H->data[H->size--];
	for(parent=1;parent*2<=H->size;parent=child)
	{
		child=parent*2;
		if((child!=H->size)&&(H->data[child]->weight>H->data[child+1]->weight))
			child++;
		if(temp->weight<=H->data[child]->weight)
			break;
		else
		{
			H->data[parent]=H->data[child];
		}
	}
	H->data[parent]=temp;
	return Mintem;
}

建造哈夫曼树

Huffman BuildHuffman(MinHeap H)
{
	Huffman T;
	int num=H->size;
	for(int i=1;i<num;i++)//循环建立哈夫曼树
	{
		T=CreateHuffman();
		T->left=DeleteMin(H);
		T->right=DeleteMin(H);
		T->weight=T->left->weight+T->right->weight;
		Insert(H,T);
	}
	T=DeleteMin(H);//根节点就为最小堆中的最后一个元素
	return T;
}

计算权重

int WPL(Huffman root,int depth)
{
	if((root->left==NULL)&&(root->right==NULL))//左右子树都为空
		return depth*root->weight;
	else//递归去左右子树求权重，而且深度加1
		return WPL(root->left,depth+1)+WPL(root->right,depth+1);
}

判断函数

int judge()
{
	Huffman T,p;
	char ch1,*codes;
	int length=0,flag=1,j,wgh;
	codes=(char*)malloc(sizeof(char)*Maxsize);
	T=CreateHuffman();
	for(int i=0;i<N;i++)
	{
		scanf("\n%c %s",&ch1,codes);
		if(strlen(codes)>=N)
			flag=0;
		else{
			for(j = 0;ch1!=ch[j];j++);
			wgh = w[j];
			p = T;
			for(j=0;j<strlen(codes);j++)
			{
				if(codes[j]=='0') {
					if(!p->left)
						p->left = CreateHuffman();
					p = p->left;
					
				}else if(codes[j] == '1') {
					if(!p->right)
						p->right = CreateHuffman();
					p = p->right;
				}
				if(p->weight) flag = 0;
			}
			if(p->left || p->right )
				flag = 0;
			else
				p->weight = wgh;
		}
		length += strlen(codes)*p->weight;
	}
	if(length!=TotalCodes)
		flag = 0;
	return flag;
}

总代码

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define Maxsize 64
typedef struct TreeNode *Huffman;
char ch[Maxsize];
int N,w[Maxsize],TotalCodes;
struct TreeNode
{
	int weight;
	Huffman left,right;
};
typedef struct HeapNode *MinHeap;
struct HeapNode
{
	Huffman data[Maxsize];
	int size;	
};
MinHeap CreateHeap();
Huffman CreateHuffman();
void Insert(MinHeap H,Huffman x);
Huffman DeleteMin(MinHeap H);
Huffman BuildHuffman(MinHeap H);
int WPL(Huffman root, int depth);
int judge();
int main()
{
	int M;
	Huffman tmp,root;
	scanf("%d",&N);
	MinHeap H=CreateHeap();
	for(int i=0;i<N;i++)
	{
		getchar();
		scanf("%c %d",&ch[i],&w[i]);
		tmp=CreateHuffman();
		tmp->weight=w[i];
		Insert(H,tmp);
	}
	root=BuildHuffman(H);
	TotalCodes=WPL(root,0);
	scanf("%d",&M);
	for(int i=0;i<M;i++)
	{
		if(judge())
			printf("Yes\n");
		else
			printf("No\n");
	}
	return 0;

}
MinHeap CreateHeap()
{
	MinHeap H;
	H=(MinHeap)malloc(sizeof(struct HeapNode));
	H->size=0;
	H->data[0]=(Huffman)malloc(sizeof(struct TreeNode));
	H->data[0]->left=H->data[0]->right=NULL;
	H->data[0]->weight=-1;
	return H;
}
Huffman CreateHuffman()
{
	Huffman H;
	H=(Huffman)malloc(sizeof(struct TreeNode));
	H->left=H->right=NULL;
	H->weight=0;
	return H;
}
void Insert(MinHeap H,Huffman x)
{
	int i=++H->size;
	while(x->weight<H->data[i/2]->weight)
	{
		H->data[i]=H->data[i/2];
		i/=2;
	}
	H->data[i]=x;
}
Huffman DeleteMin(MinHeap H)
{
	Huffman Mintem,temp;
	int parent,child;
	Mintem=H->data[1];
	temp=H->data[H->size--];
	for(parent=1;parent*2<=H->size;parent=child)
	{
		child=parent*2;
		if((child!=H->size)&&(H->data[child]->weight>H->data[child+1]->weight))
			child++;
		if(temp->weight<=H->data[child]->weight)
			break;
		else
		{
			H->data[parent]=H->data[child];
		}
	}
	H->data[parent]=temp;
	return Mintem;
}
Huffman BuildHuffman(MinHeap H)
{
	Huffman T;
	int num=H->size;
	for(int i=1;i<num;i++)
	{
		T=CreateHuffman();
		T->left=DeleteMin(H);
		T->right=DeleteMin(H);
		T->weight=T->left->weight+T->right->weight;
		Insert(H,T);
	}
	T=DeleteMin(H);
	return T;
}
int WPL(Huffman root,int depth)
{
	if((root->left==NULL)&&(root->right==NULL))
		return depth*root->weight;
	else
		return WPL(root->left,depth+1)+WPL(root->right,depth+1);
}
int judge()
{
	Huffman T,p;
	char ch1,*codes;
	int length=0,flag=1,j,wgh;
	codes=(char*)malloc(sizeof(char)*Maxsize);
	T=CreateHuffman();
	for(int i=0;i<N;i++)
	{
		scanf("\n%c %s",&ch1,codes);
		if(strlen(codes)>=N)//代码长度大于字符总个数
			flag=0;
		else{
			for(j = 0;ch1!=ch[j];j++);//找到对应的字母
			wgh = w[j];//对应的频率
			p = T;
			for(j=0;j<strlen(codes);j++)
			{
				if(codes[j]=='0') //建立左子树
				{
					if(!p->left)
						p->left = CreateHuffman();
					p = p->left;
					
				}else if(codes[j] == '1') //建立右子树
				{
					if(!p->right)
						p->right = CreateHuffman();
					p = p->right;
				}
				if(p->weight) flag = 0;//此节点已经有权重了，不符合前缀码要求
			}
			if(p->left || p->right )//不是叶子结点
				flag = 0;
			else
				p->weight = wgh;//这个节点给予权重
		}
		length += strlen(codes)*p->weight;//权重进行累加
	}
	if(length!=TotalCodes)//累加的权重如果不等于最后的总权重
		flag = 0;
	return flag;
}

希望大家能够好好体会哈夫曼树的代码，这是一个难点，希望大家能够多花点时间研究研究，取得进步！

mapleshl

关注

5
点赞
踩
10

收藏

觉得还不错? 一键收藏
0
评论
05-树9 Huffman Codes (30分)(数据结构)(C语言实现)(哈夫曼树)

05-树9 Huffman Codes (30分)In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman c
复制链接

扫一扫

专栏目录