一、 定义
1、权值:当给这个数的每一个节点赋值的时候,所赋的值
2、路径:两个相邻节点之间的连线
3、路径数:从一个节点到另一个节点所经过路径的个数
4、节点的带权路径长度:到某一个节点的路径数×次节点的权值
5、树的带权路径长度:所有节点带权路径长度之和
而哈夫曼树就是保证树的带权路径长度最小
二、构建原理
现在给定一个数量为n的节点序列。
1.从节点中选择两个权值最小和第二小的节点,让其组成一颗树,用一个父节点连接,让小的节点在左边,大的节点在右边。父节点的权值为所选连个节点的权值之和。
2. 把刚刚新建的父节点和剩余的节点一起再组成一个节点序列,重复刚刚的操作,从中选取出两个节点,在组合为一个树。以此类推 。
三、哈夫曼编码
对于哈夫曼树中的节点,权值越大,路径就越短 。
在计算机的蚊文字转义中,不同的字符有不同的使用频率,而哈夫曼树的这个特点,可以让频率使用高的字符编码更加简单,减少解码使用的时间。因此对每一个节点进行0,1的编码,每个节点上字符的编码就是通过路径上0,1的组合。
一般左孩子为0,右孩子为1。
则编码为:
d:0 a:100 c:11 d:101
四、代码
1. 节点和哈夫曼树结构体
struct TreeNode{
int weight;
int parent;
int lchild;
int rchild;
};
struct HFTree{
TreeNode* data;
int length;
};
创建哈夫曼树的原理,是利用线性结构构建哈夫曼树。这里使用的是一个结构体HFTree,包含两个成员,一个是结构体数组,里面存放的就是哈夫曼树节点(此节点也是一个结构体),另一个是整形变量,用于记录这个线性结构(结构体数组)的长度。这里有点类似顺序表的创建。将树状结构转化为线性结构。同时,用每一个节点中的指针指向其对用的父节点或者子节点,这里就像是线索化二叉树。
对于节点结构体TreeNode,里面除了权值weight,还有parent和lchlid,rchild用于指向对应的节点,这样才能进行索引,将其转化为树结构。
2.初始化
HFTree* initTree(int* weight, int length){
HFTree* T = (HFTree*)malloc(sizeof(HFTree));
T->data = (TreeNode*)malloc(sizeof(TreeNode) * (2 * length - 1));
T->length = length;
for(int i=0;i<length;i++)
{
T->data[i].weight = weight[i];
T->data[i].parent = 0;
T->data[i].lchild = -1;
T->data[i].rchild = -1;
}
return T;
}
初始化结构体,由数学逻辑可以推出n个节点序列在进行哈夫曼树化的时候,最终需要2n-1个节点,因此这里创建的data数组的大小是2*length-1。
3. 找最小
int* selectMin(HFTree* T)
{
/*int min = T->data[0].weight;
int secondMin;*/
int min = 10000;
int secondMin = 10000;
int minIndex;
int secondIndex;
for (int i = 0; i < T->length; i++)
{
if (T->data[i].parent == 0)
{
if (T->data[i].weight < min)
{
min = T->data[i].weight;
minIndex = i;
}
}
}
for (int i = 0; i < T->length; i++)
{
if (T->data[i].parent == 0 && i != minIndex)
{
if (T->data[i].weight < secondMin)
{
secondMin = T->data[i].weight;
secondIndex = i;
//cout << secondMin << endl;
}
}
}
int* res = (int*)malloc(sizeof(int) * 2);
res[0] = minIndex;
res[1] = secondIndex;
return res;
}
由创建的原理可知,每一个的插入都需要找到权值最小的两个节点,并返回下标。
4. 创建
void createHFTree(HFTree* T)
{
int* res;
int min;
int secondMin;
int length = (T->length)*2 - 1;
for (int i = T->length; i < length; i++)
{
res = selectMin(T);
min = res[0];
secondMin = res[1];
//cout << "min:" << min << "secondMin:" << secondMin << endl;
T->data[i].weight = T->data[min].weight + T->data[secondMin].weight;
T->data[i].lchild = min;
T->data[i].rchild = secondMin;
T->data[i].parent = 0;
T->data[min].parent = i;
T->data[secondMin].parent = i;
T->length++;
}
}
循环2n-1次,当在data[ i ]处插入节点的时候,其weight由此时的min和secondMin位置的权值相加,且此节点的parent下标就为i,而lchild和rchild分别为min和secondMin。最后更新长度。
5.遍历
void preOrder(HFTree* T, int index)
{
if (index != -1)
{
cout << T->data[index].weight<<" ";
preOrder(T, T->data[index].lchild);
preOrder(T, T->data[index].rchild);
}
}
preOrder(T, T->length - 1);
6. 测试
int main()
{
int weight[4] = { 1,2,3,4 };
//int weight[7] = { 5,1,3,6,11,2,4 };
HFTree* T = initTree(weight, 4);
//int* res = selectMin(T);
//cout << res[0] << " " << res[1] << endl;
createHFTree(T);
preOrder(T, T->length - 1);
cout << endl;
return 0;
}
五、完整代码框架
#include"iostream"
#include"cstdlib"
using namespace std;
typedef struct TreeNode
{
int weight;
int parent;
int lchild;
int rchild;
}TreeNode;
typedef struct HFTree
{
TreeNode* data;
int length;
}HFTree;
HFTree* initTree(int* weight, int length)
{
HFTree* T = (HFTree*)malloc(sizeof(HFTree));
T->data = (TreeNode*)malloc(sizeof(TreeNode) * (2 * length - 1));
T->length = length;
for (int i = 0; i < length; i++)
{
// the data is one struction-array(结构体数组),and "T" is the pointer of the huffumanTree
//first you visit the T and find the data, the members in the array are all struction
//these tructions are the node of the tree. And there are four members in the struction which is also called node
//so what you should do is to assign the "weight" in the struction which is in the data.
(T->data)[i].weight = weight[i];
(T->data)[i].parent = 0;
(T->data)[i].lchild = -1;
(T->data)[i].rchild = -1;
}
return T;
}
int* selectMin(HFTree* T)
{
/*int min = T->data[0].weight;
int secondMin;*/
int min = 10000;
int secondMin = 10000;
int minIndex;
int secondIndex;
for (int i = 0; i < T->length; i++)
{
if (T->data[i].parent == 0)
{
if (T->data[i].weight < min)
{
min = T->data[i].weight;
minIndex = i;
}
}
}
//
//these codes are used to ensure initial data of the min and secondMin
//because you just assign them using a very big number, it may be a little dangerous
//so the basic logic is let the "min" be equal to one of the data in the array then
// renew the data constantly until find the smallest one
//as for the "secondMin", because it can be equal to the "min", so these codes below
//is mainly used to avoid the condition
/*if (minIndex - 1 == T->length)
{
secondIndex = minIndex - 1;
}
else if (minIndex == 0)
{
secondIndex = 1;
}
else
{
secondIndex = minIndex - 1;
}
secondMin = T->data[secondIndex].weight;*/
for (int i = 0; i < T->length; i++)
{
if (T->data[i].parent == 0 && i != minIndex)
{
if (T->data[i].weight < secondMin)
{
secondMin = T->data[i].weight;
secondIndex = i;
//cout << secondMin << endl;
}
}
}
int* res = (int*)malloc(sizeof(int) * 2);
res[0] = minIndex;
res[1] = secondIndex;
return res;
}
void createHFTree(HFTree* T)
{
int* res;
int min;
int secondMin;
int length = (T->length)*2 - 1;
for (int i = T->length; i < length; i++)
{
res = selectMin(T);
min = res[0];
secondMin = res[1];
//cout << "min:" << min << "secondMin:" << secondMin << endl;
T->data[i].weight = T->data[min].weight + T->data[secondMin].weight;
T->data[i].lchild = min;
T->data[i].rchild = secondMin;
T->data[i].parent = 0;
T->data[min].parent = i;
T->data[secondMin].parent = i;
T->length++;
}
}
void preOrder(HFTree* T, int index)
{
if (index != -1)
{
cout << T->data[index].weight<<" ";
preOrder(T, T->data[index].lchild);
preOrder(T, T->data[index].rchild);
}
}
int main()
{
int weight[4] = { 1,2,3,4 };
//int weight[7] = { 5,1,3,6,11,2,4 };
HFTree* T = initTree(weight, 4);
//int* res = selectMin(T);
//cout << res[0] << " " << res[1] << endl;
createHFTree(T);
preOrder(T, T->length - 1);
cout << endl;
return 0;
}