树_赫夫曼树实现压缩与解压缩

代码实现

#include<iostream>
#include<string>
#include<vector>

typedef int Status;
#define OVERFLOW -1
#define ERROR 0
#define OK 1
#define MAX_INT 0x7fffffff

//赫夫曼树结点
typedef struct HTNode {
	unsigned int weight;
	int lchild;
	int rchild;
	int parent;
}*HuffmanTree;

//用于保存每个字符出现的频率信息
typedef struct CharactorInfo {
	char c;
	unsigned int weight;
}*CI_Statistic;

//统计文本中字符的出现频率
Status GenerateStatistic(std::string enter, int& n, CI_Statistic& CIS) {
	int len = enter.length();
	n = 0;
	for (int i = 0; i < len; ++i) {
		int j = 0;
		for (; j < n; ++j) {
			if (CIS[j].c == enter[i]) {
				CIS[j].weight += 1;
				break;
			}
		}
		if (j == n) {
			if (n == 0) {
				if (!(CIS = (CI_Statistic)malloc(sizeof(CharactorInfo) * (n + 1)))) exit(OVERFLOW);
			}
			else {
				if (!(CIS = (CI_Statistic)realloc(CIS, sizeof(CharactorInfo) * (n + 1)))) exit(OVERFLOW);
			}
			CIS[n].c = enter[i];
			CIS[n].weight = 1;
			++n;
		}
	}
	return OK;
}

//生成包含文本字符频率信息的字符串
Status GetStatistic(int n, CI_Statistic CIS, char*& c_CIS) {
	if (c_CIS) {
		free(c_CIS);
		c_CIS = NULL;
	}
	std::string s_CIS;
	for (int i = 0; i < n; ++i) {
		s_CIS += CIS[i].c;
		s_CIS += std::to_string(CIS[i].weight);
		s_CIS += ':';
	}
	int len = s_CIS.length();
	if (!(c_CIS = (char*)malloc(sizeof(char) * (len + 1)))) exit(OVERFLOW);
	for (int i = 0; i < len; ++i) {
		c_CIS[i] = s_CIS[i];
	}
	c_CIS[len] = '\0';

	return OK;
}

//选择出赫夫曼树中没有父结点的最小的两个结点
Status Select(HuffmanTree HT, int num, int& s1, int& s2) {
	if (num <= 1) return ERROR;
	unsigned int min_1 = MAX_INT, min_2 = MAX_INT;
	for (int i = 0; i < num; ++i) {
		if (HT[i].parent == -1) {
			if (HT[i].weight < min_2) {
				if (HT[i].weight < min_1) {
					min_2 = min_1;
					min_1 = HT[i].weight;
					s2 = s1;
					s1 = i;
				}
				else {
					min_2 = HT[i].weight;
					s2 = i;
				}
			}
		}
	}
	return OK;
}

//压缩
Status Compress_Huffman(std::string enter, char*& ret) {
	int len = enter.length();
	if (len <= 1) return ERROR;

	std::string s_ret;
	CI_Statistic CIS = NULL;
	int n = 0;
	char* c_CIS = NULL;

	GenerateStatistic(enter, n, CIS);
	GetStatistic(n, CIS, c_CIS);

	char* cp_CIS_cur = c_CIS;
	while ((*cp_CIS_cur) != '\0') {
		s_ret += (*cp_CIS_cur);
		++cp_CIS_cur;
	}
	s_ret += "::";

	int m = 2 * n - 1;
	HuffmanTree HT = (HuffmanTree)malloc(sizeof(HTNode) * m);
	int i = 0;
	for (; i < n; ++i) {
		HT[i].weight = CIS[i].weight;
		HT[i].lchild = HT[i].rchild = HT[i].parent = -1;
	}
	for (; i < m; ++i) {
		HT[i].weight = 0;
		HT[i].lchild = HT[i].rchild = HT[i].parent = -1;
	}
	for (int j = n; j < m; ++j) {
		int s1 = 0, s2 = 0;
		if (!Select(HT, j, s1, s2)) return ERROR;
		HT[j].lchild = s1;
		HT[j].rchild = s2;
		HT[j].weight = HT[s1].weight + HT[s2].weight;
		HT[s1].parent = j;
		HT[s2].parent = j;
	}

	std::cout << "Compress Progress:";
	for (int i = 0; i < len; ++i) {
		int j = 0;
		for (; j < n; ++j) {
			if (enter[i] == CIS[j].c) break;
		}

		std::string s_tmp;
		HTNode* HTNode_tmp = &HT[j];
		int tmp_loc = j;
		while (HTNode_tmp->parent != -1) {
			if (HT[HTNode_tmp->parent].lchild == tmp_loc) {
				s_tmp += '1';
			}
			else {
				s_tmp += '0';
			}
			tmp_loc = HTNode_tmp->parent;
			HTNode_tmp = &HT[HTNode_tmp->parent];
		}

		for (int k = s_tmp.length() - 1; k >= 0; --k) {
			s_ret += s_tmp[k];
		}

		std::cout << "|";
	}
	int len_ret = s_ret.length();
	ret = (char*)malloc(sizeof(char) * (len_ret + 1));
	for (int i = 0; i < len_ret; ++i) {
		ret[i] = s_ret[i];
	}
	ret[len_ret] = '\0';

	free(c_CIS);
	free(CIS);

	std::cout << "Finished!" << std::endl;

	return OK;
}

//解压
Status decompression(char* code, char*& ret) {
	CI_Statistic CIS = NULL;
	int n = 0;

	//std::cout << "Reading_Predix Progress:";
	char* cur = code;
	while ((*cur) != '\0') {
		char c_tmp = (*cur++);
		if ((*cur) == ':') break;
		if (n == 0) {
			CIS = (CharactorInfo*)malloc(sizeof(CharactorInfo) * (n + 1));
		}
		else {
			CIS = (CharactorInfo*)realloc(CIS, sizeof(CharactorInfo) * (n + 1));
		}
		CIS[n].c = c_tmp;

		std::string s_num;
		while ((*cur) != ':') {
			s_num += (*cur++);
		}
		CIS[n++].weight = std::stoi(s_num);
		++cur;
	}

	int m = 2 * n - 1;
	HuffmanTree HT = (HuffmanTree)malloc(sizeof(HTNode) * m);
	int i = 0;
	for (; i < n; ++i) {
		HT[i].weight = CIS[i].weight;
		HT[i].lchild = HT[i].rchild = HT[i].parent = -1;
	}
	for (; i < m; ++i) {
		HT[i].weight = 0;
		HT[i].lchild = HT[i].rchild = HT[i].parent = -1;
	}
	for (int j = n; j < m; ++j) {
		int s1 = 0, s2 = 0;
		if (!Select(HT, j, s1, s2)) return ERROR;
		HT[j].lchild = s1;
		HT[j].rchild = s2;
		HT[j].weight = HT[s1].weight + HT[s2].weight;
		HT[s1].parent = j;
		HT[s2].parent = j;
	}
	
	std::vector<std::string> v_passwd;
	for (int i = 0; i < n; ++i) {
		int tmp_loc = i;
		std::string code_decompressed;
		HTNode* tmp = &HT[i];
		while (tmp->parent != -1) {
			if (HT[tmp->parent].lchild == tmp_loc) {
				code_decompressed += "1";
			}
			else {
				code_decompressed += "0";
			}
			tmp_loc = tmp->parent;
			tmp = &HT[tmp->parent];
		}

		std::string code_decompressed_reverse;
		int len_code_decompress = code_decompressed.length();
		for (int j = len_code_decompress - 1; j >= 0; --j) {
			code_decompressed_reverse += code_decompressed[j];
		}
		v_passwd.push_back(code_decompressed_reverse);
	}
	++cur;
	std::string s_ret;
	std::cout << "Decompress Progress:";
	
	int root = 0;
	for (int i = 0; i < m; ++i) {
		if (HT[i].parent == -1) {
			root = i;
			break;
		}
	}
	int find = root;
	HTNode* find_tmp = &HT[find];
	while ((*cur) != '\0') {
		if ((*cur) == '1') {
			find = find_tmp->lchild;
			find_tmp = &HT[find];
		}
		else {
			find = find_tmp->rchild;
			find_tmp = &HT[find];
		}
		if (find<n) {
			s_ret += CIS[find].c;
			find = root;
			find_tmp = &HT[find];
			std::cout << "|";
		}
		++cur;
	}
	std::cout << "Finished!" << std::endl;

	int len_s_ret = s_ret.length();
	if (!(ret = (char*)malloc(sizeof(char) * (len_s_ret + 1)))) exit(OVERFLOW);
	for (int i = 0; i < len_s_ret; ++i) {
		ret[i] = s_ret[i];
	}
	ret[len_s_ret] = '\0';

	free(HT);
	free(CIS);

	return OK;
}


int main() {
	std::string enter;
	std::cout<<"请输入你要压缩的文本(英文字符):"<<std::endl;
	std::getline(std::cin, enter);
	
	std::cout<<"以下为压缩后的文本:"<<std::endl;
	char* ret = NULL;
	Compress_Huffman(enter, ret);
	std::cout << ret << std::endl;
	
	std::cout<<"以下为解压缩后的文本:"<<std::endl;
	char* ret_de = NULL;
	decompression(ret, ret_de);
	std::cout << ret_de << std::endl;

	return 0;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值