哈夫曼编码原理:
- 找出各字符或颜色的频度(代码中用weight来表示)
- 将出现过的字符按频度由小到大排序(小顶堆实现)
- 每次将频度最小的2个字符由堆顶吐出建哈夫曼树
- 建树的同时计算非叶子节点的频度和,即为哈夫曼编码总长度。
- 从每个叶子节点遍历树,找到每个叶子节点的编码,并用map存储映射,方便后续编码。编解码可以各存一份。
- 根据需要是否转为十六进制。
- 根据存储的映射进行解码。
#include<iostream>
#include<algorithm>
#include<queue>
#include<string>
#include<cstring>
#include<unordered_map>
using namespace std;
#define MAXN 1000001
int leaf, leaf_num;
unordered_map<string, int>umap3;
struct Node {
int color;
int weight;
string code;
int fa;
int l;
int r;
}p[MAXN];
void init(int n) {
for (int i = 0; i < 2 * n - 1; i++) {
p[i] = { -1,-1,"",-1,-1,-1};
}
leaf = leaf_num = 0;
}
struct cmp {
bool operator()(const Node& n1, const Node& n2)const{
if (n1.weight == n2.weight) {
return n1.color > n2.color;
}
else
{
return n1.weight > n2.weight;
}
}
};
void HuffmanEncode(string str) {
int n = str.length();
init(n);
int bucket[128] = { 0 };
for (int i = 0; i < n; i++) {
bucket[str[i]]++;
}
for (int i = 0; i < 128; i++) {
if (!bucket[i]) {
p[leaf].color = i;
p[leaf].weight = bucket[i];
leaf++;
}
}
leaf_num = leaf;
priority_queue<Node, vector<Node>, cmp>pq;
unordered_map<Node, int>umap;
for (int i = 0; i < leaf; i++) {
pq.push(p[i]);
umap[p[i]] = i;
}
while (pq.size() > 1) {
auto a = pq.top(); pq.pop();
auto b = pq.top(); pq.pop();
a.fa = b.fa = leaf;
p[leaf] = { a.color,a.weight + b.weight,"",-1,umap[a],umap[b] };
pq.push(p[leaf]);
umap[p[leaf]] = leaf++;
}
unordered_map<int, string>umap2;
for (int i = 0; i < leaf_num; i++) {
int parent;
string code = "";
int j = i;
while (p[j].fa != -1) {
parent = p[j].fa;
if (j == p[parent].l) {
code += '0';
}
else
{
code += '1';
}
j = parent;
}
code.reserve();
p[i].code = code;
umap2[p[i].color] = code;
umap3[code] = p[i].color;
}
int bitlen = 0;
for (int i = leaf_num; i < 2*n-1; i++) {
bitlen += p[i].weight;
}
string codebit;
for (int i = 0; i < n; i++) {
codebit += umap2[str[i]];
}
int len = codebit.length();
if (len % 8) {
int n = 8 - len % 8;
string s(n, '0');
codebit += s;
}
len += n;
unordered_map<int, string>umap4;
for (int i = 10; i < 16; i++) {
umap4[i] = 'A' - 10 + i;
}
string hex_str;
for (int i = 0; i < n; i += 4) {
int dec= codebit[i] * 8 + codebit[i + 1] * 4 + codebit[i + 2] * 2 + codebit[i + 3] * 1;
hex_str += dec < 10 ? to_string(dec) : umap4[dec];
}
}
void HuffmanDecode(string str) {
int n = str.length();
string tmp;
string decode_str;
for (int i = 0; i < n; i++) {
tmp += str[i];
if (umap3[tmp]) {
decode_str += umap3[tmp];
tmp = "";
}
}
}