C++实现哈夫曼编码

代码转自涂然哥,只做整理和略微修改

思路

1.输入一个字符串后统计每个字符出现的次数(频率)
2.接下来按照规则构造一棵哈夫曼树
3.然后01编码(即给每个字符赋予一个01组成的字符串)
4.将我们输入的字符串按照编码规则生成01字符串
5.解码:将字符串的01编码按照规则还原并输出

相关功能实现

结构体的定义

/*记录每个字符出现的次数*/
typedef struct wordcnt
{
    char ch;
    int cnt=0;
}Count;

/*整个字符串的字符构成*/
typedef struct NumCount
{
    Count count[MaxSize];
    int length=0;
}NumCount;

/*核心哈夫曼树*/
typedef struct HTree
{
    char data;
    int weight;
    int parent,lchild,rchild;
}HTNode,*HuffmanTree;

/*树的节点的编码*/
typedef struct HCode
{
    char data;
    char* str;
}*HuffmanCode;

函数申明

void ReadData(char *source);  // 读入文件 
void WordCount(char *data,NumCount *paraCnt); // 统计次数 
void Show(NumCount *paraCnt);   // 展示次数 
void CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray);  // 创建哈夫曼树 
void select(HuffmanTree HT,int top,int *s1,int *s2);  // 选择权重最小的两个节点 
void CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length);  // 创建哈夫曼编码 
void Encode(char *data,HuffmanCode HC,int length);  // 将读入的文件编码,写到txt文件 
void Decode(HuffmanTree HT,int length);  //读入编码文件,解码 

读入文件

将我们需要压缩的字符串从文件读入到数组中

void ReadData(char *source)
{
	//打开文件读入数据 
	ifstream infile;
	infile.open("in.txt");
	cout<<"Reading..."<<endl;
	cout<<"the input file is:"<<endl;
	infile.getline(source,MaxSize);
	cout<<source<<endl;
	infile.close();
	cout<<endl;
}

统计次数

统计读入的字符串中每个字符分别出现多少次

void WordCount(char* data,NumCount* paraCnt)
{
    int flag;
    int len=strlen(data);
    for(int i=0;i<len;++i)
    {
        flag=0;
        for(int j=0;j<paraCnt->length;++j)
        {
            if(data[i]==paraCnt->count[j].ch)
            {
                ++paraCnt->count[j].cnt;
                flag=1;
                break;
            }
        }
        if(flag==0)
        {
            paraCnt->count[paraCnt->length].ch=data[i];
            ++paraCnt->count[paraCnt->length].cnt;
            ++paraCnt->length;
        }
    }
}

展示次数

将每个字符及其出现的次数输出

void Show(NumCount* paraCnt)
{
	cout<<"the length is "<<paraCnt->length<<endl;
	for(int i = 0;i < paraCnt->length;++i)
	{
		cout<<"The character "<<paraCnt->count[i].ch<<"  appears  "<<paraCnt->count[i].cnt<<endl;
	}
	cout<<endl;
}

创建哈夫曼树

核心哈夫曼树的构造,此处会多次调用select函数找到权重最小的两个节点

void CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray)
{
    if(length<=1)
    printf("ERROR!!!\r\n");

	int s1,s2;
	int m = length*2-1;  // 没有度为1的节点,则总结点是2*叶子节点数-1个 
	HT = new HTNode[m+1];
	for(int i = 1;i <= m;++i)  // 初始化 
	{
		HT[i].parent = 0;
		HT[i].lchild = 0;
		HT[i].rchild = 0;
	}
	
	for(int i = 1;i <= length;++i) 
	{
		HT[i].data = cntarray.count[i-1].ch;
		HT[i].weight = cntarray.count[i-1].cnt;
	}
	
	for(int i = length + 1;i <= m;++i)
	{
		select(HT,i-1,&s1,&s2);  // 从前面的范围里选择权重最小的两个节点 
		HT[s1].parent = i;
		HT[s2].parent = i;
		HT[i].lchild = s1;
		HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;  // 得到一个新节点 
	}
}

选择权重最小的两个节点

void select(HuffmanTree HT,int top,int* s1,int* s2)
{
	int min = INT_MAX;
	for(int i = 1;i <= top;++i)  // 选择没有双亲的节点中,权重最小的节点 
	{
		if(HT[i].weight < min && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s1 = i;
		}
	}
	
	min = INT_MAX;
	for(int i = 1;i <= top;++i)  // 选择没有双亲的节点中,权重次小的节点 
	{
		if(HT[i].weight < min && i != *s1 && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s2 = i;
		}
	}
}

创建哈夫曼编码

将哈夫曼树中的每一个非空节点生成一个对应的01编码

void CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length)
{
	HC = new HCode[length+1];
	char *cd = new char[length];  // 存储编码的临时空间 
	cd[length-1] = '\0';  // 方便之后调用strcpy函数 
	int c,f,start;
	for(int i = 1;i <= length;++i)
	{
		start = length-1;  // start表示编码在临时空间内的起始下标,由于是从叶子节点回溯,所以是从最后开始 
		c = i;
		f = HT[c].parent;
		while(f != 0)
		{
			--start;  // 由于是回溯,所以从临时空间的最后往回计 
			if(HT[f].lchild == c)
				cd[start] = '0';
			else 
				cd[start] = '1';
			c = f;
			f = HT[c].parent;
		}
		HC[i].str = new char[length-start];  // 最后,实际使用的编码空间大小是length-start 
		HC[i].data = HT[i].data;
		strcpy(HC[i].str,&cd[start]);  // 从实际起始地址开始,拷贝到编码结构中 
	}
	delete[] cd;
}

将读入的文件编码,写到txt文件中

void Encode(char* data,HuffmanCode HC,int length)
{
	ofstream outfile;
	outfile.open("code.txt");
	for(int i = 0;i < strlen(data);++i)  // 依次读入数据,查找对应的编码,写入编码文件 
	{
		for(int j = 1;j <= length;++j)
		{
			if(data[i] == HC[j].data)
			{
				outfile<<HC[j].str;
			}
		}
	}
	outfile.close();
	cout<<"the code txt has been written"<<endl;
	cout<<endl;
}

读入编码文件,解码

void Decode(HuffmanTree HT,int length)
{
	char codetxt[MaxSize*length];
	ifstream infile;
	infile.open("code.txt");
	infile.getline(codetxt,MaxSize*length);
	infile.close();
	
	ofstream outfile;
   	outfile.open("out.txt");
	
	int root = 2*length-1;  // 从根节点开始遍历 
	for(int i = 0;i < strlen(codetxt);++i)
	{
		if(codetxt[i] == '0') root = HT[root].lchild;  //为0表示向左遍历 
		else if(codetxt[i] == '1') root = HT[root].rchild; //为1表示向右遍历 
		if(HT[root].lchild == 0 && HT[root].rchild == 0)  // 如果已经是叶子节点,输出到输出文件中,然后重新回到根节点 
		{
			outfile<<HT[root].data;
			root = 2*length-1;
		}
	}
	outfile.close();
	cout<<"the output txt has been written"<<endl;
	cout<<endl;
}

完整代码实现

#include <iostream>
#include <fstream>
#include <string.h>
using namespace std; 

#define MaxSize 1024

typedef struct wordcnt
{
    char ch;
    int cnt=0;
}Count;

typedef struct NumCount
{
    Count count[MaxSize];
    int length=0;
}NumCount;

typedef struct HTree
{
    char data;
    int weight;
    int parent,lchild,rchild;
}HTNode,*HuffmanTree;

typedef struct HCode
{
    char data;
    char* str;
}*HuffmanCode;

void ReadData(char *source);  // 读入文件 
void WordCount(char *data,NumCount *paraCnt); // 统计次数 
void Show(NumCount *paraCnt);   // 展示次数 
void CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray);  // 创建哈夫曼树 
void select(HuffmanTree HT,int top,int *s1,int *s2);  // 选择权重最小的两个节点 
void CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length);  // 创建哈夫曼编码 
void Encode(char *data,HuffmanCode HC,int length);  // 将读入的文件编码,写到txt文件 
void Decode(HuffmanTree HT,int length);  //读入编码文件,解码 

int main(int argc, char** argv) {
	char data[MaxSize];  
	NumCount Cntarray;
	ReadData(data);  // 读入数据 
	WordCount(data,&Cntarray);  // 统计次数 
	Show(&Cntarray); //可以查看每个单词出现的对应次数 
	HuffmanTree tree;
	CreateHuffmanTree(tree,Cntarray.length,Cntarray);  // 建树 
	HuffmanCode code;  
	CreateHuffmanCode(tree,code,Cntarray.length);  // 创建编码 
	Encode(data,code,Cntarray.length);  // 生成编码文件 
	Decode(tree,Cntarray.length);  // 解码 
	cout<<"Please view the generated TXT file to check the result"<<endl; 
	return 0;
}

void ReadData(char *source)
{
	//打开文件读入数据 
	ifstream infile;
	infile.open("in.txt");
	cout<<"Reading..."<<endl;
	cout<<"the input file is:"<<endl;
	infile.getline(source,MaxSize);
	cout<<source<<endl;
	infile.close();
	cout<<endl;
}

void WordCount(char *data,NumCount *paraCnt)
{
	int flag;// 标识是否已经记录 
	int len = strlen(data);
	for(int i = 0;i < len;++i)
	{
		flag = 0;
		for(int j = 0;j < paraCnt->length;++j)
		{
			if(paraCnt->count[j].ch == data[i]) // 若已有记录,直接++ 
			{
				++paraCnt->count[j].cnt;
				flag = 1;
				break;
			}
			
		}
		if(!flag) // 没有记录,则新增 
		{
			paraCnt->count[paraCnt->length].ch = data[i];
			++paraCnt->count[paraCnt->length].cnt;
			++paraCnt->length;
		}
	}
}

void Show(NumCount* paraCnt)
{
	cout<<"the length is "<<paraCnt->length<<endl;
	for(int i = 0;i < paraCnt->length;++i)
	{
		cout<<"The character "<<paraCnt->count[i].ch<<"  appears  "<<paraCnt->count[i].cnt<<endl;
	}
	cout<<endl;
}

void CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray)
{
    if(length<=1)
    printf("ERROR!!!\r\n");

	int s1,s2;
	int m = length*2-1;  // 没有度为1的节点,则总结点是2*叶子节点数-1个 
	HT = new HTNode[m+1];
	for(int i = 1;i <= m;++i)  // 初始化 
	{
		HT[i].parent = 0;
		HT[i].lchild = 0;
		HT[i].rchild = 0;
	}
	
	for(int i = 1;i <= length;++i) 
	{
		HT[i].data = cntarray.count[i-1].ch;
		HT[i].weight = cntarray.count[i-1].cnt;
	}
	
	for(int i = length + 1;i <= m;++i)
	{
		select(HT,i-1,&s1,&s2);  // 从前面的范围里选择权重最小的两个节点 
		HT[s1].parent = i;
		HT[s2].parent = i;
		HT[i].lchild = s1;
		HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;  // 得到一个新节点 
	}
}

void select(HuffmanTree HT,int top,int* s1,int* s2)
{
	int min = INT_MAX;
	for(int i = 1;i <= top;++i)  // 选择没有双亲的节点中,权重最小的节点 
	{
		if(HT[i].weight < min && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s1 = i;
		}
	}
	
	min = INT_MAX;
	for(int i = 1;i <= top;++i)  // 选择没有双亲的节点中,权重次小的节点 
	{
		if(HT[i].weight < min && i != *s1 && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s2 = i;
		}
	}
}

void CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length)
{
	HC = new HCode[length+1];
	char *cd = new char[length];  // 存储编码的临时空间 
	cd[length-1] = '\0';  // 方便之后调用strcpy函数 
	int c,f,start;
	for(int i = 1;i <= length;++i)
	{
		start = length-1;  // start表示编码在临时空间内的起始下标,由于是从叶子节点回溯,所以是从最后开始 
		c = i;
		f = HT[c].parent;
		while(f != 0)
		{
			--start;  // 由于是回溯,所以从临时空间的最后往回计 
			if(HT[f].lchild == c)
				cd[start] = '0';
			else 
				cd[start] = '1';
			c = f;
			f = HT[c].parent;
		}
		HC[i].str = new char[length-start];  // 最后,实际使用的编码空间大小是length-start 
		HC[i].data = HT[i].data;
		strcpy(HC[i].str,&cd[start]);  // 从实际起始地址开始,拷贝到编码结构中 
	}
	delete[] cd;
}

void Encode(char* data,HuffmanCode HC,int length)
{
	ofstream outfile;
	outfile.open("code.txt");
	for(int i = 0;i < strlen(data);++i)  // 依次读入数据,查找对应的编码,写入编码文件 
	{
		for(int j = 1;j <= length;++j)
		{
			if(data[i] == HC[j].data)
			{
				outfile<<HC[j].str;
			}
		}
	}
	outfile.close();
	cout<<"the code txt has been written"<<endl;
	cout<<endl;
}

void Decode(HuffmanTree HT,int length)
{
	char codetxt[MaxSize*length];
	ifstream infile;
	infile.open("code.txt");
	infile.getline(codetxt,MaxSize*length);
	infile.close();
	
	ofstream outfile;
   	outfile.open("out.txt");
	
	int root = 2*length-1;  // 从根节点开始遍历 
	for(int i = 0;i < strlen(codetxt);++i)
	{
		if(codetxt[i] == '0') root = HT[root].lchild;  //为0表示向左遍历 
		else if(codetxt[i] == '1') root = HT[root].rchild; //为1表示向右遍历 
		if(HT[root].lchild == 0 && HT[root].rchild == 0)  // 如果已经是叶子节点,输出到输出文件中,然后重新回到根节点 
		{
			outfile<<HT[root].data;
			root = 2*length-1;
		}
	}
	outfile.close();
	cout<<"the output txt has been written"<<endl;
	cout<<endl;
}

测试样例

输入:in.txt
In my lifetime, I will be a sincere person and never give up my love and dedication to life. I will live an infinite life in a limited time and space.
输出
the input file is:
In my lifetime, I will be a sincere person and never give up my love and dedication to life. I will live an infinite life in a limited time and space.
the length is 24
The character I appears 3
The character n appears 12
The character appears 30
The character m appears 5
The character y appears 2
The character l appears 10
The character i appears 18
The character f appears 4
The character e appears 18
The character t appears 6
The character , appears 1
The character w appears 2
The character b appears 1
The character a appears 8
The character s appears 3
The character c appears 3
The character r appears 3
The character p appears 3
The character o appears 4
The character d appears 6
The character v appears 4
The character g appears 1
The character u appears 1
The character . appears 2
the code txt has been written
the output txt has been written
Please view the generated TXT file to check the result
hafumantree

  • 18
    点赞
  • 142
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

ALuoJi

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值