树系列文章:
树(一):二叉树(BiTree) 创建+销毁+前中后层遍历(递归+非递归)C++
树(二):线索二叉树(ThreadBiTree) 线索化及其非递归遍历C++
前言
哈夫曼树也被称为最优二叉树,将每一个节点的权重(或者说是出现频率)进行统计,然后组成一个带权路径长度最短的二叉树。
带权路径长度:WPL = 每个叶子节点的权重 * 到该节点需要经过的边数
将树转化成为哈夫曼树之后,我们的找寻节点的效率会大大提高,频率高的节点经过的边数少,很容易找到。
哈夫曼树
始终用权值最小的两个节点 ,组合成新的节点,直到只剩下根节点为止。
权重节点队列:
组成的HuffmanTree:
- 程序
#include <iostream>
#include <queue>
#include <vector>
#include <functional>
using namespace std;
/*********************************HuffmanTree(霍夫曼树)****************************************/
/***************************************类申明****************************************/
struct TreeNode
{
char val; //节点中存放的值
int weight; //权
struct TreeNode *left;
struct TreeNode *right;
TreeNode(int weight) : weight(weight), left(NULL), right(NULL) {}
};
class HuffmanTree
{
TreeNode *_root;
public:
HuffmanTree(vector<int> &);
void preTraverse(TreeNode *); //前序遍历
void inTraverse(TreeNode *); //中序遍历
TreeNode *getRoot() const { return _root; }
private:
TreeNode *createTree(vector<int> &);
};
/**************************************类定义****************************************/
HuffmanTree::HuffmanTree(vector<int> &vec)
{
cout << "HuffmanTree()" << endl;
_root = createTree(vec);
}
struct Mycompare
{
bool operator()(const TreeNode *lhs, const TreeNode *rhs)
{
return lhs->weight > rhs->weight;
}
};
TreeNode *HuffmanTree::createTree(vector<int> &vec)
{
std::priority_queue<TreeNode *, vector<TreeNode *>, Mycompare> que;
for (auto &e : vec)
{
TreeNode *node = new TreeNode(e);
que.push(node);
}
while (que.size() > 1)
{
TreeNode *min1 = que.top();
que.pop();
TreeNode *min2 = que.top();
que.pop();
TreeNode *parent = new TreeNode(min1->weight + min2->weight);
min1->weight < min2->weight ? (parent->left = min1, parent->right = min2) : (parent->right = min1, parent->left = min2);
que.push(parent);
}
TreeNode *root = que.top();
que.pop();
return root;
}
void HuffmanTree::preTraverse(TreeNode *node)
{
if (!node)
cout << "# ";
else
{
cout << node->weight << " ";
preTraverse(node->left);
preTraverse(node->right);
}
}
void HuffmanTree::inTraverse(TreeNode *node)
{
if (!node)
cout << "# ";
else
{
inTraverse(node->left);
cout << node->weight << " ";
inTraverse(node->right);
}
}
/**********************************测试函数******************************************/
void test0()
{
vector<int> weights{3, 2, 9, 7, 25, 18};
HuffmanTree tree(weights);
cout << "Pre:" << endl;
tree.preTraverse(tree.getRoot());
cout << endl;
cout << "In:" << endl;
tree.inTraverse(tree.getRoot());
cout << endl;
}
int main()
{
test0();
return 0;
}
哈夫曼编码
有了哈夫曼树,哈夫曼编码自然也就很容易理解了,在创建好了哈夫曼树之后,从根节点开始,往左则编码0, 往右则编码1,每一个字母都有了属于自己的哈夫曼编码。
哈夫曼编码属于不定长编码
,我们熟悉的ASCII码是定长编码
。
定长编码每一个编码占位相同(如ASCII码为8位)。
不定长编码则根据权重进行编码,这样组成的总编码长度会是最小,相比于定长编码减少了约20%的长度,这样的话就起到了压缩数据的作用。
注意:
1.也可以往左编码1,网友编码0,所以哈夫曼编码并不唯一。
2.哈夫曼编码必须在哈夫曼树创建好之后才能进行编码。
3.由于哈夫曼树的特性,哈夫曼编码并不会造成前缀重复。
- 程序
#include <iostream>
#include <queue>
#include <vector>
#include <functional>
#include <string>
#include <map>
using namespace std;
/*********************************HuffmanCode(霍夫曼编码)****************************************/
/***************************************类申明****************************************/
struct TreeNode
{
char val; //节点中存放的值
int weight; //权
string code; //哈夫曼编码
struct TreeNode *left;
struct TreeNode *right;
TreeNode(char val, int weight) : val(val), weight(weight), left(NULL), right(NULL) {}
};
class HuffmanTree
{
TreeNode *_root;
map<string, char> _codemap;
public:
HuffmanTree(vector<pair<char, int>> &);
void preTraverse(TreeNode *); //前序遍历
void inTraverse(TreeNode *); //中序遍历
TreeNode *getRoot() const { return _root; }
void encode(TreeNode *root, string code);
void printCodemap();
private:
TreeNode *createTree(vector<pair<char, int>> &);
};
/**************************************类定义****************************************/
HuffmanTree::HuffmanTree(vector<pair<char, int>> &vec)
{
cout << "HuffmanTree()" << endl;
_root = createTree(vec);
}
struct Mycompare
{
bool operator()(const TreeNode *lhs, const TreeNode *rhs)
{
return lhs->weight > rhs->weight;
}
};
TreeNode *HuffmanTree::createTree(vector<pair<char, int>> &vec)
{
std::priority_queue<TreeNode *, vector<TreeNode *>, Mycompare> que;
for (auto &e : vec)
{
TreeNode *node = new TreeNode(e.first, e.second);
que.push(node);
}
while (que.size() > 1)
{
TreeNode *min1 = que.top();
que.pop();
TreeNode *min2 = que.top();
que.pop();
TreeNode *parent = new TreeNode('\0', min1->weight + min2->weight);
min1->weight < min2->weight ? (parent->left = min1, parent->right = min2) : (parent->right = min1, parent->left = min2);
que.push(parent);
}
TreeNode *root = que.top();
que.pop();
return root;
}
void HuffmanTree::preTraverse(TreeNode *node)
{
if (!node)
cout << "# ";
else
{
cout << node->val << " ";
preTraverse(node->left);
preTraverse(node->right);
}
}
void HuffmanTree::inTraverse(TreeNode *node)
{
if (!node)
cout << "# ";
else
{
inTraverse(node->left);
cout << node->val << " ";
inTraverse(node->right);
}
}
void HuffmanTree::encode(TreeNode *root, string code)
{
if (root)
{
if (!root->left && !root->right)
{
root->code += code;
_codemap[code] = root->val;
}
encode(root->left, code + "0");
encode(root->right, code + "1");
}
}
void HuffmanTree::printCodemap()
{
for (auto &e : _codemap)
cout << e.first << ":" << e.second << endl;
}
/**********************************测试函数******************************************/
//HuffmanTree
void test0()
{
vector<pair<char, int>> vec{{'A', 2}, {'B', 3}, {'C', 7}, {'D', 9}, {'E', 18}, {'F', 25}};
HuffmanTree tree(vec);
cout << "Pre:" << endl;
tree.preTraverse(tree.getRoot());
cout << endl;
cout << "In:" << endl;
tree.inTraverse(tree.getRoot());
cout << endl;
}
//HuffmanCode
void test1()
{
//假设已经统计出了权重,创建最优二叉树
vector<pair<char, int>> vec{{'A', 2}, {'B', 3}, {'C', 7}, {'D', 9}, {'E', 18}, {'F', 25}};
HuffmanTree tree(vec);
//编码
string temp;
tree.encode(tree.getRoot(), temp);
tree.printCodemap();
//解码
}
int main()
{
test1();
return 0;
}
本应生成的哈夫曼编码如下:
程序运行结果:
wwx@VM-0-7-ubuntu:~/dataStructure/Tree/HuffmanTree$ cd "/home/wwx/dataStructure/Tree/HuffmanTree/" && g++ main.cpp -o main && "/home/wwx/dataStructure/Tree/HuffmanTree/"main
HuffmanTree()
0:F
10:E
110:D
11100:A
11101:B
1111:C