赫夫曼编码压缩和解压代码

最新推荐文章于 2024-04-08 18:10:19 发布

wordqiong

最新推荐文章于 2024-04-08 18:10:19 发布

阅读量732

点赞数 1

分类专栏：学习日志文章标签：二叉树 huffman tree

本文链接：https://blog.csdn.net/qq_54151629/article/details/121465719

版权

学习日志专栏收录该内容

11 篇文章 0 订阅

订阅专栏

思路已在代码块注释里

#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include <fstream>
#include<cstdlib>
using namespace std;
/*哈夫曼
* 1.读入文件并计算权重
* 2.编码建树
* 注：这就是我们的基础树 压缩解压都是根据这个树来的
* 注1：为了确保解压和压缩文件相同，请记住您建树的文本文档，这将是您的密钥 压缩文件相当于公钥
*/
/*哈夫曼压缩算法：
* 1.回到文件头，开始以位运算的形式压缩
* 2.开始存储 根据树 右移存储
*	这里有一个问题 就是如何保证存入的是位字符串
*	1.位满8位自动存入 如何判断位满八位
*		开始 0000_0000 1111_1111 存入8位数字，将被替换
*			 1111_0011 0000_0000 1111_1111 即如果第二个数字成为了0意味着前面那个存满了八位，就可以进行存储了
*			 所以采用int的形式
*/
/*哈夫曼解压算法：
* 1.读入文件 开始读取位
* 2.根据树开始索引解压
*/
int c_weight[128] = {};//记录各个字符权重
typedef struct {
	unsigned int weight;
	//parent 记录是否被引用
	unsigned int parent, lchid, rchild;
} HTNode, * HuffmanTree;
typedef char** HuffmanCode;
/*
* name:权重计算
* author：wyh
* func：完成权重计算
* elem：txtw是文档路径
*		将在函数内完成文档的打开和关闭
*/
int WeightCul(char* txtW)
{
	ifstream infile(txtW, ios::in);
	//进行文件打开判断
	if (!infile.is_open())
	{
		cout << "文件打开失败" << endl;
		return 0;
	}
	char ch;//读入字符
	while (!infile.eof())
	{
		ch = infile.get();
		c_weight[ch]++;
	}
	infile.close();
}
void Select(HuffmanTree HT, int n, int &s1, int &s2)
{
	int i;
	int min = 99999;
	
	//找到weight最小的结点，下标存在s1中
	for (i = 1; i <= n; i++)
	{//未被使用的并且是最小的结点
		if (HT[i].weight < min && HT[i].parent == 0)
		{
			min = HT[i].weight;
			s1 = i;

		}
	}
	//找到weight次小的结点，下标存在s2中
	int next_min = 99999;
	for (i = 1; i <= n; i++)
	{//未被使用的并且是最小的结点
		if (HT[i].weight < next_min && HT[i].parent == 0&&(i!=s1))
		{
			next_min = HT[i].weight;
			s2 = i;
		}
	}

}
/*
* name:哈夫曼编码
* author：wyh
* func：完成哈夫曼编码工作
* elem：n是文档中字符总个数
*		*ch是编码串
*		*w是权重表
*/
void HuffmanCoding(HuffmanTree& HT,const int* w,const int n)
{
	HuffmanTree p;
	int s1, s2;
	int i;
	if (n <= 1)
		return;
	//因为n个字符最多出现2*n-1个结点 
	int m = 2 * n - 1;
	HT = new HTNode[m + 1];
	//完成n个结点的初始化操作
	//n个结点存放数据
	

	for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)
	{
		p->weight = *w;
		p->parent = 0;
		p->lchid = 0;
		p->rchild = 0;
	}
	//n-1个中间结点不存放数据 初始化
	for (; i <= m; ++i, ++p)
	{
		p->weight = 0;
		p->parent = 0;
		p->lchid = 0;
		p->rchild = 0;
	}
	//初始化完成，开始建树
	for (i = n + 1; i <= m; i++)
	{
		//这里应该有一个挑选节点生成二叉树的函数
		Select(HT, i - 1, s1, s2);
		HT[s1].parent = i;
		HT[s2].parent = i;
		HT[i].lchid = s1;
		HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;
	}
}
/*
* name:哈夫曼译码
* author：wyh
* func：完成哈夫曼译码工作
* elem：无需对树更改 const 变量
*		n是文档中字符总个数
*		*ch是编码串
* (读入文件需要有字符串)
*/
void HuffmanTranCoding(const HuffmanTree HT,const int n,char *ch)
{
	int m = 2 * n - 1;
	int i;
	int j = 0;
	//输入0100001串完成对应字母转码
	while (ch[j] != '\0')
	{
		i = m;//从根开始找
		while (HT[i].lchid != 0 && HT[i].rchild != 0)
		{
			if (ch[j] == '0')
			{
				i = HT[i].lchid;
			}
			else
				i = HT[i].rchild;
			++j;
		}
		//找到了 在HT[i]
		/*cout<<*/
	}
}
/*
* name:哈夫曼压缩
* author：wyh
* func：完成哈夫曼压缩工作 将在函数内打开文件并进行读取压缩
* elem：无需对树更改 const 变量
*return：为0表示存储失败，为1表示存储成功
*/
int HuffmanZip(const HuffmanTree HT,int n,bool is_need_cin=true,char* txtw=NULL,char* txtd=NULL)
{
	int count_char = 0;//记录读入的字符总数
	int count_bit = 0;//记录写入的bit总数
	
	int start;
	int f,c;
	int i;
	char *txtW = {};
	char *txtD = {};
	if (is_need_cin) {
		cout << "请输入要压缩的文档的绝对路径(请确保绝对路径不要过长)" << endl;
		txtW = new char[256];
		cin >> txtW;
		cout << "请输入想要的生成压缩文件的名称" << endl;
		txtD = new char[256];
		cin >> txtD;
	}
	else
	{
		txtW = txtw;
		txtD = txtd;
	}
	ifstream infile(txtW, ios::in);
	//进行文件打开判断
	if (!infile.is_open())
	{
		cout << "文件打开失败" << endl;
		return 0;
	}
	ofstream outfile(txtD, ios::out);
	//cout << txtD << endl;
	if (!outfile.is_open())
	{
		cout << "文件打开失败" << endl;
		infile.close();
		return 0;
	}
	//infile.seekg(ios::end);
	//cout<< infile.tellg();
	//infile.seekg(ios::beg);
	unsigned int cd;
	cd = 0x0fffffff;
	char ch;
	char put_ch;
	//这是一种后端给值的方式，从后往前给了一个串，我们要做的 同样是从后往前，完美
	/*满一个char就送进去 送一个char*/
	while (!infile.eof())
	{
		ch = infile.get();
		c = ch+1;
		for (f = HT[ch+(ch != -1)].parent; f != 0; c = f, f = HT[f].parent)
		{
			if (HT[f].lchid == c)
			{
				//高位置0
				cd = cd >> 1;
				cd = cd &0x7fffffff;
			}
			else
			{
				//高位置1
				cd = cd >> 1;
				cd = cd | 0x80000000;
			}
			count_bit++;
			if ((cd & 0xffffff) == 0xfffff)
			{
				//这一步可能出错
				//ch = cd >> 24;
				put_ch = cd >> 24;
				cout <<ch<< hex << cd;
				outfile.put(put_ch);
				cd = 0x0fffffff;
				count_char++;
			}
		}
	}
	if (count_bit % 8 != 0)
	{
		//将最后几个bit直接输入
		put_ch = cd >> (24 + 8 - count_bit % 8);
		cout << ch << hex << cd;
		outfile.put(put_ch);
	}
	int count_left = count_bit % 8;
	outfile .put( count_left);
	infile.close();
	outfile.close();

}
void PrintHuffman(HuffmanCode &HC,int n,const HuffmanTree HT)
{
	int start;
	int i;
	int c, f;
	HC = new char* [n + 1];
	char* cd;
	cd = new char[n];
	cd[n - 1] = '\0';//末尾置零
	for (i = 1; i <= n; i++)
	{
		start = n - 1;
		for (c = i, f = HT[c].parent; f != 0; c = f, f = HT[f].parent)
		{
			if (HT[f].lchid == c)
				cd[--start] = '0';
			else
				cd[--start] = '1';
			HC[i] = new char[n - start];
			strcpy(HC[i], &cd[start]);//非常巧妙的做法
		}
		cout <<i-1<<" "<<char(i-1)<<" "<< HC[i] << endl;
	}
	delete[]cd; 
}
int RevHuffmanUnZip(const HuffmanTree HT, int n,char *txtW,char * txtD)
{
	ifstream infile(txtW, ios::in);
	//进行文件打开判断
	if (!infile.is_open())
	{
		cout << "文件打开失败" << endl;
		return 0;
	}
	ofstream outfile(txtD, ios::out);
	//cout << txtD << endl;
	if (!outfile.is_open())
	{
		cout << "文件打开失败" << endl;
		infile.close();
		return 0;
	}
	infile.seekg(-1,ios::end);
	int count = 0;
	int left = infile.get();
	count++;

	//开始漫游树
	//假设上面最后剩下了5个bit 那么 8个里面有3个0和5个有效数字


	//找到最大根结点位置
	int weight_max = 0;
	int pos = 0;
	for (int i = 1; i <= 2 * n - 1; i++)
	{
		if (HT[i].weight > weight_max)
		{
			weight_max = HT[i].weight;
			pos = i;
		}
	}

	//文本最后一个字节存着有效字节数
	//读编码
	unsigned char ch;
	int now_value;


	//先特例化处理第一个字符
	infile.seekg(-1-count, ios::end);
	ch = infile.get();
	count++;
	int i = 0;
	int count_weishu = 1;//记录char中8个字节读了多少位
	while (1) {
		i = pos;//从根开始找
		if (count_weishu > left)
			break;
		if (left == 0)
			break;
		while (HT[i].lchid != 0 && HT[i].rchild != 0)
		{
			if ((now_value = (ch >> (left - count_weishu)) & 1) == 0)//若为1 向右走
			{
				i = HT[i].lchid;
			}
			else//否则向左走
				i = HT[i].rchild;
			count_weishu++;
			//如果多余的字符走完了还没找到结果 开始正常的读字符吧
			if (count_weishu > left)
				break;
		}
		//找到了 在HT[i]
		if (!(HT[i].lchid != 0 && HT[i].rchild != 0))
		{
			outfile << char(i - 1);
			continue;//直到left用完了为止
		}
		else
		{
			break;
		}
	}
	//开始正常处理：
	count_weishu = 1;
	bool is_find = false;
	while (1)
	{
		infile.seekg(-1 - count, ios::end);
		if (infile.tellg() == -1)
			return 1;//读完了，结束了！
		ch = infile.get();
		count++;

		//i = pos;不使用它的原因是要用上一个结点
		while ((HT[i].lchid != 0 && HT[i].rchild != 0)||(count_weishu<=8))
		{
			is_find = false;
			if ((now_value = (ch >> (8 - count_weishu)) & 1) == 0)//若为1 向右走
			{
				i = HT[i].lchid;
			}
			else//否则向左走
				i = HT[i].rchild;
			count_weishu++;
			//找到了 在HT[i]
			if (!(HT[i].lchid != 0 && HT[i].rchild != 0))
			{
				outfile << char(i - 1);
				i = pos;
				is_find = true;
				if (count_weishu > 8)
					break;
				continue;//直到left用完了为止
			}
			//如果8个字符走完了还没找到结果 读下一个字符
			if (count_weishu > 8)
				break;
		}
		//出来是因为 找完了并且8个字符用完了
		if (is_find)
			i = pos;
		count_weishu = 1;
	}
	

	infile.close();
	outfile.close();
}
int HuffmanUnZip(const HuffmanTree HT, int n)
{
	int count_char = 0;//记录读入的字符总数
	int count_bit = 0;//记录写入的bit总数
	char txtW[256] = {};
	char txtD[256] = {};
	int start;
	int f, c;
	int i;
	cout << "请输入要解压的文档的绝对路径(请确保绝对路径不要过长)" << endl;
	cin >> txtW;
	cout << "请输入想要的生成解压文件的名称" << endl;
	cin >> txtD;
	RevHuffmanUnZip(HT, 128, txtW, txtD);
	HuffmanZip(HT, 128,false,txtD,txtW);
	RevHuffmanUnZip(HT, 128, txtW, txtD);
	return 0;
}
int main()
{
	cout << "请输入作为存树的文档的绝对路径(请确保绝对路径不要过长)" << endl;
	char txtW[256] = {};
	cin >> txtW;
	WeightCul(txtW);
	HuffmanTree HT;
	//生成树
	HuffmanCoding(HT,c_weight,128);
	HuffmanCode HC;
	PrintHuffman(HC, 128, HT);
	//选择压缩还是解压
	cout << "请选择压缩还是解压  1表示压缩，0表示解压" << endl;
	bool judge;
	cin >> judge;
	if (judge)
		HuffmanZip(HT,128);
	else
		HuffmanUnZip(HT,128);
}

wordqiong

关注

1
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
赫夫曼编码压缩和解压代码

思路已在代码块注释里#define _CRT_SECURE_NO_WARNINGS#include<iostream>#include <fstream>#include<cstdlib>using namespace std;/*哈夫曼* 1.读入文件并计算权重* 2.编码建树* 注：这就是我们的基础树压缩解压都是根据这个树来的* 注1：为了确保解压和压缩文件相同，请记住您建树的文本文档，这将是您的密钥压缩文件相当于公钥*//*哈夫曼压.
复制链接

扫一扫