哈夫曼编码及解码

最新推荐文章于 2024-07-19 23:01:38 发布

大头娃娃鱼

最新推荐文章于 2024-07-19 23:01:38 发布

阅读量426

点赞数 6

文章标签：算法

本文链接：https://blog.csdn.net/m0_46119836/article/details/135205764

版权

哈夫曼编码原理：

找出各字符或颜色的频度（代码中用weight来表示）
将出现过的字符按频度由小到大排序（小顶堆实现）
每次将频度最小的2个字符由堆顶吐出建哈夫曼树
建树的同时计算非叶子节点的频度和，即为哈夫曼编码总长度。
从每个叶子节点遍历树，找到每个叶子节点的编码，并用map存储映射，方便后续编码。编解码可以各存一份。
根据需要是否转为十六进制。
根据存储的映射进行解码。

#include<iostream>
#include<algorithm>
#include<queue>
#include<string>
#include<cstring>
#include<unordered_map>
using namespace std;

#define MAXN 1000001
int leaf, leaf_num;
unordered_map<string, int>umap3;

struct Node {
	int color;
	int weight;
	string code;
	int fa;
	int l;
	int r;
}p[MAXN];

void init(int n) {
	for (int i = 0; i < 2 * n - 1; i++) {
		p[i] = { -1,-1,"",-1,-1,-1};
	}
	leaf = leaf_num = 0;
}

//小顶堆比较函数
struct cmp {
	bool operator()(const Node& n1, const Node& n2)const{
		if (n1.weight == n2.weight) {
			return n1.color > n2.color;
		}
		else
		{
			return n1.weight > n2.weight;
		}
	}
};


//str 为imag像素块字符串
void HuffmanEncode(string str) {
	int n = str.length();
	init(n);

	//calculate weight and create Node
	int bucket[128] = { 0 };
	for (int i = 0; i < n; i++) {
		bucket[str[i]]++;
	}
	for (int i = 0; i < 128; i++) {
		if (!bucket[i]) {			
			p[leaf].color = i;
			p[leaf].weight = bucket[i];
			leaf++;
		}
	}
	leaf_num = leaf;

	//create Huffman tree
	priority_queue<Node, vector<Node>, cmp>pq;
	unordered_map<Node, int>umap;
	for (int i = 0; i < leaf; i++) {
		pq.push(p[i]);
		umap[p[i]] = i;
	}
	while (pq.size() > 1) {
		auto a = pq.top(); pq.pop();
		auto b = pq.top(); pq.pop();
		a.fa = b.fa = leaf;
		p[leaf] = { a.color,a.weight + b.weight,"",-1,umap[a],umap[b] };
		pq.push(p[leaf]);
		umap[p[leaf]] = leaf++;
	}

	//Huffman encode
	unordered_map<int, string>umap2;

	for (int i = 0; i < leaf_num; i++) {
		int parent;
		string code = "";
		int j = i;
		while (p[j].fa != -1) {
			parent = p[j].fa;
			if (j == p[parent].l) {
				code += '0';
			}
			else
			{
				code += '1';
			}
			j = parent;
		}
		code.reserve();
		p[i].code = code;

		//create color and code mapping for easy encode		
		umap2[p[i].color] = code;
		umap3[code] = p[i].color;
	}

	//Huffman code length
	int bitlen = 0;
	for (int i = leaf_num; i < 2*n-1; i++) {
		bitlen += p[i].weight;
	}

	//Huffman code bit output
	string codebit;
	for (int i = 0; i < n; i++) {
		codebit += umap2[str[i]];
	}

	//最后不足8bit,补足bit
	int len = codebit.length();
	if (len % 8) {
		int n = 8 - len % 8;
		string s(n, '0');
		codebit += s;
	}
	len += n;

	//转换十六进制
	unordered_map<int, string>umap4;
	for (int i = 10; i < 16; i++) {
		umap4[i] = 'A' - 10 + i;
	}

	string hex_str;
	for (int i = 0; i < n; i += 4) {
		int dec= codebit[i] * 8 + codebit[i + 1] * 4 + codebit[i + 2] * 2 + codebit[i + 3] * 1;
		hex_str += dec < 10 ? to_string(dec) : umap4[dec];
	}
}

//Huffman Decode
void HuffmanDecode(string str) {
	int n = str.length();
	string tmp;
	string decode_str;
	for (int i = 0; i < n; i++) {
		tmp += str[i];
		if (umap3[tmp]) {
			decode_str += umap3[tmp];
			tmp = "";			
		}
	}
}