#include<iostream>
using namespace std;
#pragma warning(disable:4996)
typedef struct {
char data;
double weight;
int parent, lch, rch;
}HTNode, * HuffmanTree;
typedef char** hafuman;
void select(HuffmanTree HT, int top, int& s1, int& s2)
{
double min = INT_MAX;
for (int i = 1; i <= top; ++i) // 选择没有双亲的节点中,权重最小的节点
{
if (HT[i].weight < min && HT[i].parent == 0)
{
min = HT[i].weight;
s1 = i;
}
}
min = INT_MAX;
for (int i = 1; i <= top; ++i) // 选择没有双亲的节点中,权重次小的节点
{
if (HT[i].weight < min && i != s1 && HT[i].parent == 0)
{
min = HT[i].weight;
s2 = i;
}
}
}
void CreateHuffmanTree(HuffmanTree& HT, int n)
{
if (n <= 1) cout << "error" << endl;
int s1, s2;
int a[100];
int sum = 0;
int m = n * 2 - 1; // 没有度为1的节点,则总结点是2*叶子节点数-1个
HT = new HTNode[m + 1];
for (int i = 1; i <= m; ++i) // 初始化
{
HT[i].data = '*';
HT[i].parent = 0;
HT[i].lch = 0;
HT[i].rch = 0;
HT[i].weight = 0.0;
}
for (int i = 1; i <= n; ++i)
{
cin >> HT[i].data >> a[i];
sum += a[i];
}
for (int i = 1; i <= n; i++)
{
HT[i].weight = (double)a[i] / sum;
}
for (int i = n + 1; i <= m; ++i)
{
select(HT, i - 1, s1, s2); // 从前面的范围里选择权重最小的两个节点
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lch = s1;
HT[i].rch = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight; // 得到一个新节点
}
}
void Huffman_code(HuffmanTree HT, hafuman & HC, int n)//求哈夫曼编码
{
HC = new char* [n + 1];
char* cd;
cd = new char[n + 1];
cd[n - 1] = '\0';
int c, f, start;
for (int i = 1; i <= n; i++)
{
start = n - 1;
c = i;
f = HT[i].parent;
while (f)
{
start--;
if (HT[f].lch == c)
cd[start] = '0';
else
cd[start] = '1';
c = f;
f = HT[f].parent;
}
HC[i] = new char[n - start];
strcpy(HC[i], &cd[start]);
}
delete[] cd;
}
int main() {
HuffmanTree hf;
CreateHuffmanTree(hf, 6);
hafuman HC;
Huffman_code(hf, HC, 6);
for (int i = 1; i <= 6; i++)
{
cout << i << "data: " << hf[i].data << " weight " << hf[i].weight <<" number "<<HC[i] << endl;
}
return 0;
}
哈夫曼编码是一种用于无损数据压缩的熵编码算法,通常用于压缩重复率比较高的字符数据。
哈夫曼编码有两个特点:
1.带权路径长度WPL最短且唯一;
2.编码互不为前缀(一个编码不是另一个编码的开头)。
Q1.为什么通过哈夫曼编码后得到的二进制码不会有前缀的问题呢?
这是因为在哈夫曼树中,每个字母对应的节点都是叶子节点,而他们对应的二进制码是由根节点到各自节点的路径所决定的,正因为是叶子节点,每个节点的路径不可能和其他节点有前缀的关系。
Q2.为什么通过哈夫曼编码获得的二进制码短呢?
因为哈夫曼树是带权路径长度最短的树,权值较大的节点离根节点较近。而带权路径长度是指:树中所有的叶子节点的权值乘上其到根节点的路径长度,这与最终的哈夫曼编码总长度成正比关系的。
(http://t.csdn.cn/ESU1U)