C语言-哈夫曼树与哈夫曼编码的实现

C语言-哈夫曼树与哈夫曼编码的实现

1、什么是哈夫曼树

结点的权:树中的结点被赋予一个表示某种意义的数值;
结点的带权路径长度:从树的根到任意结点的路径长度(经过的边数)与该结点上权值的乘积;
树的带权路径长度(WPL) W P L = ∑ i = 1 n w i l i WPL = \sum\limits_{i=1}^nw_il_i WPL=i=1nwili
w i w_i wi是第i个叶结点所带的权值, l i l_i li是该叶结点到根结点的路径长度;
哈夫曼树:在含有n个带权叶结点的二叉树中,其中带权路径长度(WPL)最小的二叉树;


2、哈夫曼树的构造

哈夫曼树的构造过程

给定n个权值分别为 w 1 w_1 w1, w 2 w_2 w2,…, w n w_n wn的结点:

  • 将这n个结点分别作为n棵仅含一个结点的二叉树,构成森林F;
  • 构造一个新结点,从F中选取两棵根结点权值最小的树作为新结点的左右子树,新结点的权值为左右子树根结点的权值之和;
  • 从F中删除刚才选出的两棵树,同时将新得到的树加入F中;
  • 重复2,3,直到F中只剩下一棵树为止;

以如下结点为例,构造一棵哈夫曼树:
结点:{a:45 , b:13 , c:12 , d:16 , e:9 , f:5}
在这里插入图片描述
哈夫曼树的特点:

  • 每个初始结点最终都是叶结点,且权值越小的结点到根结点的路径越大;
  • 构造过程中共新建了n-1个结点,因此哈夫曼树的结点总数为2n-1;
  • 哈夫曼树中不存在度为1的结点;

3、哈夫曼编码

哈夫曼编码:又称霍夫曼编码,是一种编码方式,哈夫曼编码是可变字长编码(VLC)的一种。
该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。

由哈夫曼树得到哈夫曼编码:可将字符的编码解释为从根至该字符的路径上边标记的序列,其中一般边标记0表示左孩子,标记1表示右孩子。


3、哈夫曼树与哈夫曼编码的代码实现

哈夫曼树与哈夫曼编码的结构体:

//赫夫曼树结点
typedef struct
{
	int weight;//权值
	int parent, lchild, rchild;
}HTNode,*HTTree;

//赫夫曼编码
typedef struct
{
	char ch;//结点名称
	char code[10];//结点的编码
}CodeNode,*HuffmanCode;

哈夫曼树的初始化:

//赫夫曼树的初始化,m表示赫夫曼树总的结点个数
void InitHuffmanTree(HTTree &HT, int m)
{
	HT=(HTTree)malloc(m * sizeof(HTNode));//为赫夫曼树结点申请空间

	//赫夫曼树结点的初始化
	for (int i = 0; i < m; i++)
	{
		HT[i].weight = 0;
		HT[i].parent = -1;
		HT[i].lchild = -1;
		HT[i].rchild = -1;
	}
}

哈夫曼树挑选2个最小权值的结点(重点):

//从n个结点中选取2个最小的结点,将其下标保存到min1,min2
void SelectMin(HTTree &HT, int n, int &min1, int &min2)
{
	//临时结点,保存没有父结点的结点
	typedef struct
	{
		int LocWeight;
		int loc;//结点原始的下标
	}TempNode,*TempTree;

	int i, j=0;
	int m1, m2;
	m1 = m2 = 0;

	TempTree temptree = (TempTree)malloc(n * sizeof(TempNode));

	//将无父结点的结点存入temptree
	for (i = 0; i < n; i++)
	{
		if (HT[i].parent == -1 && HT[i].weight != 0)
		{
			temptree[j].LocWeight = HT[i].weight;
			temptree[j].loc = i;
			j++;
		}
	}

	//选择权值最小的结点
	for (i = 0; i < j; i++)
	{
		if (temptree[i].LocWeight < temptree[m1].LocWeight)//不取等号,让其选择前面最小的结点
			m1 = i;
	}

	//选择权值次小的结点
	for (i = 0; i < j; i++)
	{
		if (m1 == m2)
			m2++;//当m1在第一个位置时,m2后移一位
		if (i != m1 && temptree[i].LocWeight <= temptree[m2].LocWeight)
			m2 = i;
	}
	min1 = temptree[m1].loc;
	min2 = temptree[m2].loc;
}

哈夫曼树的创建:

//由n个结点创建HuffmanTree
void CreateHuffmanTree(HTTree &HT, int weigh[],int n)
{
	int total = 2 * n - 1;//HuffmanTree中总的结点个数
	int i, min1, min2;

	InitHuffmanTree(HT, total);

	for (i = 0; i < n; i++)
		HT[i].weight = weigh[i];

	//每次选择两个权值最小的结点合并
	for (i = n; i < total; i++)
	{
		SelectMin(HT, i, min1, min2);
		HT[min1].parent = i;
		HT[min2].parent = i;
		HT[i].lchild = min1;
		HT[i].rchild = min2;
		HT[i].weight = HT[min1].weight + HT[min2].weight;
	}

}

哈夫曼编码的创建:

注意:这里采用的方法是从叶子结点向根结点遍历的方式,保存哈夫曼编码,在实现时要注意字符的赋值问题。

//HuffmanCode的创建
void CreateHuffmanCode(HTTree &HT, HuffmanCode &HC, char codename[], int n)
{
	int i, start,par,c;
	char *cd = (char *)malloc(n * sizeof(char ));
	cd[n-1] = '\0';//临时存储编码

	HC = (HuffmanCode)malloc(n * sizeof(CodeNode));

	//为每个结点创建HuffmanCode
	for (i = 0; i < n; i++)
	{
		HC[i].ch = codename[i];
		start = n - 1;
		c = i;

		//从叶子结点向上查找到根
		while ((par = HT[c].parent) >= 0)
		{
			--start;

			//看c是父结点的左子树还是右子树
			if (HT[par].lchild == c)
				cd[start] = '0';
			else
				cd[start] = '1';
			c = par;
		}

		int g = 0;
		for (int l = start; l < n; l++)
			HC[i].code[g++] = cd[l];
	}
	free(cd);
}

哈夫曼编码的输出函数:

void OutPutHuffmanCode(HuffmanCode HC, int n)
{

	for (int i = 0; i < n; i++)
		printf("%c: %s\n", HC[i].ch, HC[i].code);

}

至此,哈夫曼树与哈夫曼编码的函数实现都已完成,接下来在main函数中测试一下,与下面这种图对比。

int main()
{
	int n = 6;
	char codename[6] = { 'a','b','c','d','e','f' };
	int weigh[6] = { 45,13,12,16,9,5 };

	HTTree HT;
	HuffmanCode HC;

	CreateHuffmanTree(HT, weigh, n);
	CreateHuffmanCode(HT, HC, codename, n);
	OutPutHuffmanCode(HC, n);

	return 0;
}

在这里插入图片描述
在这里插入图片描述


#include #include #include #include using namespace std; # define MaxN 100//初始设定的最大结点数 # define MaxC 1000//最大编码长度 # define ImpossibleWeight 10000//结点不可能达到的权值 # define n 26//字符集的个数 //-----------哈夫曼树的结点结构类型定义----------- typedef struct //定义哈夫曼树各结点 { int weight;//权值 int parent;//双亲结点下标 int lchild;//左孩子结点下标 int rchild;//右孩子结点下标 }HTNode,*HuffmanTree;//动态分配数组存储哈夫曼树 typedef char**HuffmanCode;//动态分配数组存储哈夫曼编码表 //-------全局变量-------- HuffmanTree HT; HuffmanCode HC; int *w;//权值数组 //const int n=26;//字符集的个数 char *info;//字符值数组 int flag=0;//初始化标记 //********************************************************************** //初始化函数 //函数功能: 从终端读入字符集大小n , 以及n个字符和n个权值,建立哈夫曼树,并将它存于文件hfmTree中 //函数参数: //向量HT的前n个分量表示叶子结点,最后一个分量表示根结点,各字符的编码长度不等,所以按实际长度动态分配空间 void Select(HuffmanTree t,int i,int &s1,int &s2) { //s1为最小的两个值中序号最小的那个 int j; int k=ImpossibleWeight;//k的初值为不可能达到的最大权值 for(j=1;j<=i;j++) { if(t[j].weight<k&&t[j].parent==0) {k=t[j].weight; s1=j;} } t[s1].parent=1; k=ImpossibleWeight; for(j=1;j<=i;j++) { if(t[j].weight0),构造哈夫曼树HT,并求出n个字符的哈弗曼编码HC { int i,m,c,s1,s2,start,f; HuffmanTree p; char* cd; if(num<=1) return; m=2*num-1;//m为结点数,一棵有n个叶子结点的哈夫曼树共有2n-1个结点,可以存储在一个大小为2n-1的一维数组中 HT=(HuffmanTree)malloc((m+1)*sizeof(HTNode));//0号单元未用 //--------初始化哈弗曼树------- for(p=HT+1,i=1;iweight=*w; p->parent=0; p->lchild=0; p->rchild=0; } for(i=num+1;iweight=0; p->parent=0; p->lchild=0; p->rchild=0; } //--------建哈夫曼树------------- for(i=num+1;i<=m;i++) { Select(HT,i-1,s1,s2);//在HT[1...i-1]选择parent为0且weight最小的两个结点,其序号分别为s1和s2 HT[s1].parent=i; HT[s2].parent=i; HT[i].lchild=s1; HT[i].rchild=s2;//左孩子权值小,右孩子权值大 HT[i].weight=HT[s1].weight+HT[s2].weight; } //-------从叶子到根逆向求每个字符的哈弗曼编码-------- HC=(HuffmanCode)malloc((num+1)*sizeof(char *));//指针数组:分配n个字符编码的头指针向量 cd=(char*)malloc(n*sizeof(char*));//分配求编码的工作空间 cd[n-1]='\0';//编码结束符 for(i=1;i<=n;i++)//逐个字符求哈弗曼编码 { start=n-1;//编码结束符位置 for(c=i,f=HT[i].parent;f!=0;c=f,f=HT[f].parent)//从叶子到跟逆向求哈弗曼编码 if(HT[f].lchild==c) cd[--start]='0';//判断是左孩子还是右孩子(左为0右为1) else cd[--start]='1'; HC[i]=(char*)malloc((num-start)*sizeof(char*));//按所需长度分配空间 int j,h; strcpy(HC[i],&cd[start]); } free(cd); } //****************初始化函数****************** void Initialization() { flag=1;//标记为已初始化 int i; w=(int*)malloc(n*sizeof(int));//为26个字符权值分配空间 info=(char*)malloc(n*sizeof(char));//为26个字符分配空间 ifstream infile("ABC.txt",ios::in); if(!infile) { cerr<<"打开失败"<<endl; exit(1); } for(i=0;i>info[i]; infile>>w[i]; } infile.close(); cout<<"读入字符成功!"<<endl; HuffmanCoding(HT,HC,w,n); //------------打印编码----------- cout<<"依次显示各个字符的值,权值或频度,编码如下"<<endl; cout<<"字符"<<setw(6)<<"权值"<<setw(11)<<"编码"<<endl; for(i=0;i<n;i++) { cout<<setw(3)<<info[i]; cout<<setw(6)<<w[i]<<setw(12)<<HC[i+1]<<endl; } //---------将建好的哈夫曼树写入文件------------ cout<<"下面将哈夫曼树写入文件"<<endl; ofstream outfile("hfmTree.txt",ios::out); if(!outfile) { cerr<<"打开失败"<<endl; exit(1); } for(i=0;i<n;i++,w++) { outfile<<info[i]<<" "; outfile<<w[i]<<" "; outfile<<HC[i+1]<<" "; } outfile.close(); cout<<"已经将字符与对应的权值,编码写入根目录下文件hfmTree.txt"<<endl; } //*****************输入待编码字符函数************************* void Input() { char string[100]; ofstream outfile("ToBeTran.txt",ios::out); if(!outfile) { cerr<<"打开失败"<<endl; exit(1); } cout<<"请输入你想要编码的字符串(字符个数应小于100),以#结束"<>string; for(int i=0;string[i]!='\0';i++) { if(string[i]=='\0') break; outfile<<string[i]; } cout<<"获取报文成功"<<endl; outfile.close(); cout<<"------"<<"已经将报文存入根目录下的ToBeTran.txt文件"<<endl; } //******************编码函数**************** void Encoding() { int i,j; char*string; string=(char*)malloc(MaxN*sizeof(char)); cout<<"下面对根目录下的ToBeTran.txt文件中的字符进行编码"<<endl; ifstream infile("ToBeTran.txt",ios::in); if(!infile) { cerr<<"打开失败"<<endl; exit(1); } for(i=0;i>string[i]; } for(i=0;i<100;i++) if(string[i]!='#') cout<<string[i]; else break; infile.close(); ofstream outfile("CodeFile.txt",ios::out); if(!outfile) { cerr<<"打开失败"<<endl; exit(1); } for(i=0;string[i]!='#';i++) { for(j=0;j<n;j++) { if(string[i]==info[j]) outfile<<HC[j+1]; } } outfile<<'#'; outfile.close(); free(string); cout<<"编码完成------"; cout<<"编码已写入根目录下的文件CodeFile.txt中"<<endl; } //******************译码函数**************** void Decoding() { int j=0,i; char *code; code=(char*)malloc(MaxC*sizeof(char)); char*string; string=(char*)malloc(MaxN*sizeof(char)); cout<<"下面对根目录下的CodeFile.txt文件中的代码进行译码"<<endl; ifstream infile("CodeFile.txt",ios::in); if(!infile) { cerr<<"打开失败"<<endl; exit(1); } for( i=0;i>code[i]; if(code[i]!='#') { cout<<code[i]; } else break; } infile.close(); int m=2*n-1; for(i=0;code[i-1]!='#';i++) { if(HT[m].lchild==0) { string[j]=info[m-1]; j++; m=2*n-1; i--; } else if(code[i]=='1') m=HT[m].rchild; else if(code[i]=='0') m=HT[m].lchild; } string[j]='#'; ofstream outfile("TextFile.txt",ios::out); if(!outfile) { cerr<<"打开失败"<<endl; exit(1); } cout<<"的译码为------"<<endl; for( i=0;string[i]!='#';i++) { outfile<<string[i]; cout<<string[i]; } outfile<<'#'; outfile.close(); cout<<"------译码完成------"<<endl; cout<<"译码结果已写入根目录下的文件TextFile.txt中"<<endl; free(code); free(string); } //*************打印编码函数**************** void Code_printing() { int i; char *code; code=(char*)malloc(MaxC*sizeof(char)); cout<<"下面打印根目录下文件CodeFile.txt中的编码"<<endl; ifstream infile("CodeFile.txt",ios::in); if(!infile) { cerr<<"打开失败"<<endl; exit(1); } for( i=0;i>code[i]; if(code[i]!='#') cout<<code[i]; else break; } infile.close(); cout<<endl; ofstream outfile("CodePrin.txt",ios::out); if(!outfile) { cerr<<"打开失败"<<endl; exit(1); } for(i=0;code[i]!='#';i++) { outfile<<code[i]; } outfile.close(); free(code); cout<<"------打印结束------"<<endl; cout<<"该字符形式的编码文件已写入文件CodePrin.txt中"<<endl; } //*************打印哈夫曼树函数**************** int numb=0; void coprint(HuffmanTree start,HuffmanTree HT) //start=ht+26这是一个递归算法 { if(start!=HT) { ofstream outfile("TreePrint.txt",ios::out); if(!outfile) { cerr<<"打开失败"<rchild,HT); //递归先序遍历 cout<<setw(5*numb)<weight<rchild==0) cout<<info[start-HT-1]<<endl; outfile<weight; coprint(HT+start->lchild,HT); numb--; outfile.close(); } } void Tree_printing(HuffmanTree HT,int num) { HuffmanTree p; p=HT+2*num-1; //p=HT+26 cout<<"下面打印赫夫曼树"<<endl; coprint(p,HT); //p=HT+26 cout<<"打印工作结束"<<endl; } //*************主函数************************** int main() { char choice; do{ cout<<"************哈弗曼编/译码器系统***************"<<endl; cout<<"请选择您所需功能:"<<endl; cout<<":初始化哈弗曼树"<<endl; cout<<":输入待编码字符串"<<endl; cout<<":利用已建好的哈夫曼树进行编码"<<endl; cout<<":利用已建好的哈夫曼树进行译码"<<endl; cout<<":打印代码文件"<<endl; cout<<":打印哈夫曼树"<<endl; cout<<":退出"<<endl; if(flag==0) { cout<<"请先初始化哈夫曼树,输入I"<<endl; cout<<""<>choice; switch(choice) { case 'I':Initialization();break; case 'W':Input();break; case 'E':Encoding();break; case 'D':Decoding();break; case 'P':Code_printing();break; case 'T':Tree_printing(HT,n);break; case 'Q':;break; default:cout<<"输入的命令出错,请重新输入!"<<endl; } }while(choice!='Q'); free(w); free(info); free(HT); free(HC); system("pause"); return 0; }
哈夫曼树是一种特殊的二叉树,它用于数据压缩和解压缩中的编码和解码。哈夫曼编码是一种前缀编码,它把不同字符映射到不同的二进制编码,使得编码后的字符串最小化。 下面是使用C语言实现哈夫曼树哈夫曼编码的代码: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_TREE_HT 100 typedef struct MinHeapNode{ char data; unsigned freq; struct MinHeapNode *left, *right; } MinHeapNode; typedef struct MinHeap{ unsigned size; unsigned capacity; MinHeapNode **array; } MinHeap; typedef struct HuffmanNode{ char data; char *code; struct HuffmanNode *left, *right; } HuffmanNode; MinHeapNode* newNode(char data, unsigned freq){ MinHeapNode* temp = (MinHeapNode*)malloc(sizeof(MinHeapNode)); temp->left = temp->right = NULL; temp->data = data; temp->freq = freq; return temp; } MinHeap* createMinHeap(unsigned capacity){ MinHeap* minHeap = (MinHeap*)malloc(sizeof(MinHeap)); minHeap->size = 0; minHeap->capacity = capacity; minHeap->array = (MinHeapNode**)malloc(minHeap->capacity * sizeof(MinHeapNode*)); return minHeap; } void swapMinHeapNode(MinHeapNode** a, MinHeapNode** b){ MinHeapNode* t = *a; *a = *b; *b = t; } void minHeapify(MinHeap* minHeap, int idx){ int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) smallest = left; if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) smallest = right; if (smallest != idx){ swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); minHeapify(minHeap, smallest); } } int isSizeOne(MinHeap* minHeap){ return (minHeap->size == 1); } MinHeapNode* extractMin(MinHeap* minHeap){ MinHeapNode* temp = minHeap->array[0]; minHeap->array[0] = minHeap->array[minHeap->size - 1]; --minHeap->size; minHeapify(minHeap, 0); return temp; } void insertMinHeap(MinHeap* minHeap, MinHeapNode* minHeapNode){ ++minHeap->size; int i = minHeap->size - 1; while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq){ minHeap->array[i] = minHeap->array[(i - 1) / 2]; i = (i - 1) / 2; } minHeap->array[i] = minHeapNode; } void buildMinHeap(MinHeap* minHeap){ int n = minHeap->size - 1; int i; for (i = (n - 1) / 2; i >= 0; --i) minHeapify(minHeap, i); } void printArr(int arr[], int n){ int i; for (i = 0; i < n; ++i) printf("%d", arr[i]); printf("\n"); } int isLeaf(MinHeapNode* root){ return !(root->left) && !(root->right); } MinHeap* createAndBuildMinHeap(char data[], int freq[], int size){ MinHeap* minHeap = createMinHeap(size); for (int i = 0; i < size; ++i) minHeap->array[i] = newNode(data[i], freq[i]); minHeap->size = size; buildMinHeap(minHeap); return minHeap; } MinHeapNode* buildHuffmanTree(char data[], int freq[], int size){ MinHeapNode *left, *right, *top; MinHeap* minHeap = createAndBuildMinHeap(data, freq, size); while (!isSizeOne(minHeap)){ left = extractMin(minHeap); right = extractMin(minHeap); top = newNode('$', left->freq + right->freq); top->left = left; top->right = right; insertMinHeap(minHeap, top); } return extractMin(minHeap); } void printCodes(MinHeapNode* root, int arr[], int top){ if (root->left){ arr[top] = 0; printCodes(root->left, arr, top + 1); } if (root->right){ arr[top] = 1; printCodes(root->right, arr, top + 1); } if (isLeaf(root)){ printf("%c: ", root->data); printArr(arr, top); } } void HuffmanCodes(char data[], int freq[], int size){ MinHeapNode* root = buildHuffmanTree(data, freq, size); int arr[MAX_TREE_HT], top = 0; printCodes(root, arr, top); } int main(){ char arr[] = {'a', 'b', 'c', 'd', 'e', 'f'}; int freq[] = {5, 9, 12, 13, 16, 45}; int size = sizeof(arr) / sizeof(arr[0]); HuffmanCodes(arr, freq, size); return 0; } ``` 该代码实现哈夫曼树哈夫曼编码的基本功能。在这个例子中,我们使用了6个字符和它们的频率来构建哈夫曼树并打印出哈夫曼编码。 输出结果如下: ``` a: 0 c: 100 b: 101 f: 110 d: 1110 e: 1111 ``` 这些二进制编码可以用于压缩数据。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值
>