C++实现哈夫曼编码

最新推荐文章于 2024-03-09 17:33:17 发布

ALuoJi

最新推荐文章于 2024-03-09 17:33:17 发布

阅读量6.7k

点赞数 18

分类专栏：数据结构文章标签： c++ 霍夫曼树数据结构

本文链接：https://blog.csdn.net/AHuRui/article/details/124985516

版权

数据结构专栏收录该内容

15 篇文章 3 订阅

订阅专栏

文章目录

思路
相关功能实现
完整代码实现
测试样例

代码转自涂然哥，只做整理和略微修改

思路

1.输入一个字符串后统计每个字符出现的次数（频率）
2.接下来按照规则构造一棵哈夫曼树
3.然后01编码（即给每个字符赋予一个01组成的字符串）
4.将我们输入的字符串按照编码规则生成01字符串
5.解码：将字符串的01编码按照规则还原并输出

相关功能实现

结构体的定义

/*记录每个字符出现的次数*/
typedef struct wordcnt
{
    char ch;
    int cnt=0;
}Count;

/*整个字符串的字符构成*/
typedef struct NumCount
{
    Count count[MaxSize];
    int length=0;
}NumCount;

/*核心哈夫曼树*/
typedef struct HTree
{
    char data;
    int weight;
    int parent,lchild,rchild;
}HTNode,*HuffmanTree;

/*树的节点的编码*/
typedef struct HCode
{
    char data;
    char* str;
}*HuffmanCode;

函数申明

void ReadData(char *source);  // 读入文件 
void WordCount(char *data,NumCount *paraCnt); // 统计次数 
void Show(NumCount *paraCnt);   // 展示次数 
void CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray);  // 创建哈夫曼树 
void select(HuffmanTree HT,int top,int *s1,int *s2);  // 选择权重最小的两个节点 
void CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length);  // 创建哈夫曼编码 
void Encode(char *data,HuffmanCode HC,int length);  // 将读入的文件编码，写到txt文件 
void Decode(HuffmanTree HT,int length);  //读入编码文件，解码

读入文件

将我们需要压缩的字符串从文件读入到数组中

void ReadData(char *source)
{
	//打开文件读入数据 
	ifstream infile;
	infile.open("in.txt");
	cout<<"Reading..."<<endl;
	cout<<"the input file is:"<<endl;
	infile.getline(source,MaxSize);
	cout<<source<<endl;
	infile.close();
	cout<<endl;
}

统计次数

统计读入的字符串中每个字符分别出现多少次

void WordCount(char* data,NumCount* paraCnt)
{
    int flag;
    int len=strlen(data);
    for(int i=0;i<len;++i)
    {
        flag=0;
        for(int j=0;j<paraCnt->length;++j)
        {
            if(data[i]==paraCnt->count[j].ch)
            {
                ++paraCnt->count[j].cnt;
                flag=1;
                break;
            }
        }
        if(flag==0)
        {
            paraCnt->count[paraCnt->length].ch=data[i];
            ++paraCnt->count[paraCnt->length].cnt;
            ++paraCnt->length;
        }
    }
}

展示次数

将每个字符及其出现的次数输出

void Show(NumCount* paraCnt)
{
	cout<<"the length is "<<paraCnt->length<<endl;
	for(int i = 0;i < paraCnt->length;++i)
	{
		cout<<"The character "<<paraCnt->count[i].ch<<"  appears  "<<paraCnt->count[i].cnt<<endl;
	}
	cout<<endl;
}

创建哈夫曼树

核心哈夫曼树的构造，此处会多次调用select函数找到权重最小的两个节点

void CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray)
{
    if(length<=1)
    printf("ERROR!!!\r\n");

	int s1,s2;
	int m = length*2-1;  // 没有度为1的节点，则总结点是2*叶子节点数-1个 
	HT = new HTNode[m+1];
	for(int i = 1;i <= m;++i)  // 初始化 
	{
		HT[i].parent = 0;
		HT[i].lchild = 0;
		HT[i].rchild = 0;
	}
	
	for(int i = 1;i <= length;++i) 
	{
		HT[i].data = cntarray.count[i-1].ch;
		HT[i].weight = cntarray.count[i-1].cnt;
	}
	
	for(int i = length + 1;i <= m;++i)
	{
		select(HT,i-1,&s1,&s2);  // 从前面的范围里选择权重最小的两个节点 
		HT[s1].parent = i;
		HT[s2].parent = i;
		HT[i].lchild = s1;
		HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;  // 得到一个新节点 
	}
}

选择权重最小的两个节点

void select(HuffmanTree HT,int top,int* s1,int* s2)
{
	int min = INT_MAX;
	for(int i = 1;i <= top;++i)  // 选择没有双亲的节点中，权重最小的节点 
	{
		if(HT[i].weight < min && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s1 = i;
		}
	}
	
	min = INT_MAX;
	for(int i = 1;i <= top;++i)  // 选择没有双亲的节点中，权重次小的节点 
	{
		if(HT[i].weight < min && i != *s1 && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s2 = i;
		}
	}
}

创建哈夫曼编码

将哈夫曼树中的每一个非空节点生成一个对应的01编码

void CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length)
{
	HC = new HCode[length+1];
	char *cd = new char[length];  // 存储编码的临时空间 
	cd[length-1] = '\0';  // 方便之后调用strcpy函数 
	int c,f,start;
	for(int i = 1;i <= length;++i)
	{
		start = length-1;  // start表示编码在临时空间内的起始下标，由于是从叶子节点回溯，所以是从最后开始 
		c = i;
		f = HT[c].parent;
		while(f != 0)
		{
			--start;  // 由于是回溯，所以从临时空间的最后往回计 
			if(HT[f].lchild == c)
				cd[start] = '0';
			else 
				cd[start] = '1';
			c = f;
			f = HT[c].parent;
		}
		HC[i].str = new char[length-start];  // 最后，实际使用的编码空间大小是length-start 
		HC[i].data = HT[i].data;
		strcpy(HC[i].str,&cd[start]);  // 从实际起始地址开始，拷贝到编码结构中 
	}
	delete[] cd;
}

将读入的文件编码，写到txt文件中

void Encode(char* data,HuffmanCode HC,int length)
{
	ofstream outfile;
	outfile.open("code.txt");
	for(int i = 0;i < strlen(data);++i)  // 依次读入数据，查找对应的编码，写入编码文件 
	{
		for(int j = 1;j <= length;++j)
		{
			if(data[i] == HC[j].data)
			{
				outfile<<HC[j].str;
			}
		}
	}
	outfile.close();
	cout<<"the code txt has been written"<<endl;
	cout<<endl;
}

读入编码文件，解码

void Decode(HuffmanTree HT,int length)
{
	char codetxt[MaxSize*length];
	ifstream infile;
	infile.open("code.txt");
	infile.getline(codetxt,MaxSize*length);
	infile.close();
	
	ofstream outfile;
   	outfile.open("out.txt");
	
	int root = 2*length-1;  // 从根节点开始遍历 
	for(int i = 0;i < strlen(codetxt);++i)
	{
		if(codetxt[i] == '0') root = HT[root].lchild;  //为0表示向左遍历 
		else if(codetxt[i] == '1') root = HT[root].rchild; //为1表示向右遍历 
		if(HT[root].lchild == 0 && HT[root].rchild == 0)  // 如果已经是叶子节点，输出到输出文件中，然后重新回到根节点 
		{
			outfile<<HT[root].data;
			root = 2*length-1;
		}
	}
	outfile.close();
	cout<<"the output txt has been written"<<endl;
	cout<<endl;
}

完整代码实现

#include <iostream>
#include <fstream>
#include <string.h>
using namespace std; 

#define MaxSize 1024

typedef struct wordcnt
{
    char ch;
    int cnt=0;
}Count;

typedef struct NumCount
{
    Count count[MaxSize];
    int length=0;
}NumCount;

typedef struct HTree
{
    char data;
    int weight;
    int parent,lchild,rchild;
}HTNode,*HuffmanTree;

typedef struct HCode
{
    char data;
    char* str;
}*HuffmanCode;

void ReadData(char *source);  // 读入文件 
void WordCount(char *data,NumCount *paraCnt); // 统计次数 
void Show(NumCount *paraCnt);   // 展示次数 
void CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray);  // 创建哈夫曼树 
void select(HuffmanTree HT,int top,int *s1,int *s2);  // 选择权重最小的两个节点 
void CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length);  // 创建哈夫曼编码 
void Encode(char *data,HuffmanCode HC,int length);  // 将读入的文件编码，写到txt文件 
void Decode(HuffmanTree HT,int length);  //读入编码文件，解码 

int main(int argc, char** argv) {
	char data[MaxSize];  
	NumCount Cntarray;
	ReadData(data);  // 读入数据 
	WordCount(data,&Cntarray);  // 统计次数 
	Show(&Cntarray); //可以查看每个单词出现的对应次数 
	HuffmanTree tree;
	CreateHuffmanTree(tree,Cntarray.length,Cntarray);  // 建树 
	HuffmanCode code;  
	CreateHuffmanCode(tree,code,Cntarray.length);  // 创建编码 
	Encode(data,code,Cntarray.length);  // 生成编码文件 
	Decode(tree,Cntarray.length);  // 解码 
	cout<<"Please view the generated TXT file to check the result"<<endl; 
	return 0;
}

void ReadData(char *source)
{
	//打开文件读入数据 
	ifstream infile;
	infile.open("in.txt");
	cout<<"Reading..."<<endl;
	cout<<"the input file is:"<<endl;
	infile.getline(source,MaxSize);
	cout<<source<<endl;
	infile.close();
	cout<<endl;
}

void WordCount(char *data,NumCount *paraCnt)
{
	int flag;// 标识是否已经记录 
	int len = strlen(data);
	for(int i = 0;i < len;++i)
	{
		flag = 0;
		for(int j = 0;j < paraCnt->length;++j)
		{
			if(paraCnt->count[j].ch == data[i]) // 若已有记录，直接++ 
			{
				++paraCnt->count[j].cnt;
				flag = 1;
				break;
			}
			
		}
		if(!flag) // 没有记录，则新增 
		{
			paraCnt->count[paraCnt->length].ch = data[i];
			++paraCnt->count[paraCnt->length].cnt;
			++paraCnt->length;
		}
	}
}

void Show(NumCount* paraCnt)
{
	cout<<"the length is "<<paraCnt->length<<endl;
	for(int i = 0;i < paraCnt->length;++i)
	{
		cout<<"The character "<<paraCnt->count[i].ch<<"  appears  "<<paraCnt->count[i].cnt<<endl;
	}
	cout<<endl;
}

void CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray)
{
    if(length<=1)
    printf("ERROR!!!\r\n");

	int s1,s2;
	int m = length*2-1;  // 没有度为1的节点，则总结点是2*叶子节点数-1个 
	HT = new HTNode[m+1];
	for(int i = 1;i <= m;++i)  // 初始化 
	{
		HT[i].parent = 0;
		HT[i].lchild = 0;
		HT[i].rchild = 0;
	}
	
	for(int i = 1;i <= length;++i) 
	{
		HT[i].data = cntarray.count[i-1].ch;
		HT[i].weight = cntarray.count[i-1].cnt;
	}
	
	for(int i = length + 1;i <= m;++i)
	{
		select(HT,i-1,&s1,&s2);  // 从前面的范围里选择权重最小的两个节点 
		HT[s1].parent = i;
		HT[s2].parent = i;
		HT[i].lchild = s1;
		HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;  // 得到一个新节点 
	}
}

void select(HuffmanTree HT,int top,int* s1,int* s2)
{
	int min = INT_MAX;
	for(int i = 1;i <= top;++i)  // 选择没有双亲的节点中，权重最小的节点 
	{
		if(HT[i].weight < min && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s1 = i;
		}
	}
	
	min = INT_MAX;
	for(int i = 1;i <= top;++i)  // 选择没有双亲的节点中，权重次小的节点 
	{
		if(HT[i].weight < min && i != *s1 && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s2 = i;
		}
	}
}

void CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length)
{
	HC = new HCode[length+1];
	char *cd = new char[length];  // 存储编码的临时空间 
	cd[length-1] = '\0';  // 方便之后调用strcpy函数 
	int c,f,start;
	for(int i = 1;i <= length;++i)
	{
		start = length-1;  // start表示编码在临时空间内的起始下标，由于是从叶子节点回溯，所以是从最后开始 
		c = i;
		f = HT[c].parent;
		while(f != 0)
		{
			--start;  // 由于是回溯，所以从临时空间的最后往回计 
			if(HT[f].lchild == c)
				cd[start] = '0';
			else 
				cd[start] = '1';
			c = f;
			f = HT[c].parent;
		}
		HC[i].str = new char[length-start];  // 最后，实际使用的编码空间大小是length-start 
		HC[i].data = HT[i].data;
		strcpy(HC[i].str,&cd[start]);  // 从实际起始地址开始，拷贝到编码结构中 
	}
	delete[] cd;
}

void Encode(char* data,HuffmanCode HC,int length)
{
	ofstream outfile;
	outfile.open("code.txt");
	for(int i = 0;i < strlen(data);++i)  // 依次读入数据，查找对应的编码，写入编码文件 
	{
		for(int j = 1;j <= length;++j)
		{
			if(data[i] == HC[j].data)
			{
				outfile<<HC[j].str;
			}
		}
	}
	outfile.close();
	cout<<"the code txt has been written"<<endl;
	cout<<endl;
}

void Decode(HuffmanTree HT,int length)
{
	char codetxt[MaxSize*length];
	ifstream infile;
	infile.open("code.txt");
	infile.getline(codetxt,MaxSize*length);
	infile.close();
	
	ofstream outfile;
   	outfile.open("out.txt");
	
	int root = 2*length-1;  // 从根节点开始遍历 
	for(int i = 0;i < strlen(codetxt);++i)
	{
		if(codetxt[i] == '0') root = HT[root].lchild;  //为0表示向左遍历 
		else if(codetxt[i] == '1') root = HT[root].rchild; //为1表示向右遍历 
		if(HT[root].lchild == 0 && HT[root].rchild == 0)  // 如果已经是叶子节点，输出到输出文件中，然后重新回到根节点 
		{
			outfile<<HT[root].data;
			root = 2*length-1;
		}
	}
	outfile.close();
	cout<<"the output txt has been written"<<endl;
	cout<<endl;
}

测试样例

输入：in.txt
In my lifetime, I will be a sincere person and never give up my love and dedication to life. I will live an infinite life in a limited time and space.
输出
the input file is:
In my lifetime, I will be a sincere person and never give up my love and dedication to life. I will live an infinite life in a limited time and space.
the length is 24
The character I appears 3
The character n appears 12
The character appears 30
The character m appears 5
The character y appears 2
The character l appears 10
The character i appears 18
The character f appears 4
The character e appears 18
The character t appears 6
The character , appears 1
The character w appears 2
The character b appears 1
The character a appears 8
The character s appears 3
The character c appears 3
The character r appears 3
The character p appears 3
The character o appears 4
The character d appears 6
The character v appears 4
The character g appears 1
The character u appears 1
The character . appears 2
the code txt has been written
the output txt has been written
Please view the generated TXT file to check the result

ALuoJi

关注

18
点赞
踩
142

收藏

觉得还不错? 一键收藏
打赏
2
评论
C++实现哈夫曼编码

文章目录思路相关功能实现结构体的定义函数申明读入文件统计次数展示次数创建哈夫曼树选择权重最小的两个节点创建哈夫曼编码将读入的文件编码，写到txt文件中读入编码文件，解码完整代码实现测试样例代码转自涂然哥，只做整理和略微修改思路1.输入一个字符串后统计每个字符出现的次数（频率）2.接下来按照规则构造一棵哈夫曼树3.然后01编码（即给每个字符赋予一个01组成的字符串）4.将我们输入的字符串按照编码规则生成01字符串5.解码：将字符串的01编码按照规则还原并输出相关功能实现结构体的定义
复制链接

扫一扫