1.定义
假设有m个权值w1,w2````wm,构造一颗含有n个叶子结点的二叉树,每个叶子节点的权为wi,其中带权路径长度最短的二叉树成为最优二叉树或者哈夫曼树。
2.哈夫曼树的基本概念
路径:从树中一个结点到另一个结点之间的分支构成这两个结点间的路径。
结点的路径长度:两个结点路径上的分支数。
数的路径长度:从树根到每一个节点的路径长度之和。 记作:TL
结点数目相同的二叉树中,完全二叉树是路径长度最短的二叉树。
权:将树中结点赋给一个存有某种含义的数值,则这个数值称为该结点的权。
结点的带权路径长度:从根节点到该结点之间的路径长度与该结点的权的乘积。
树的带权路径长度:树中所有的叶子结点的带权路径长度之和。
3.创建哈夫曼树代码
Status CreateHuffmanTree(HuffmanTree &HT, int length, NumCount cntarray)
{
if(length <= 1) return ERROR;
int s1, s2;
int m = length * 2 - 1;
HT = new HTNode[m + 1];
for(int i = 1; i <= m; i++)
{
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
for(int i = 1; i <= length; i++)
{
HT[i].data = cntarray.count[i - 1].ch;
HT[i].weight = cntarray.count[i - 1].cnt;
}
for(int i = length + 1; i <= m; i++)
{
select(HT, i - 1, &s1, &s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
return OK;
}//of CreateHuffmanTree
4.创建哈夫曼编码的代码
Status CreateHuffmanCode(HuffmanTree HT, HuffmanCode &HC, int length)
{
HC = new HCode[length + 1];
char *cd = new char[length];
cd[length - 1] = '\0';
int c, f, start;
for(int i = 1; i <= length; i++)
{
start = length - 1;
c = i;
f = HT[c].parent;
while (f != 0)
{
start--;
if(HT[f].lchild == c)
{
cd[start] = '0';
}
else
{
cd[start] = 1;
}
c = f;
f = HT[c].parent;
}
HC[i].str = new char[length - start];
HC[i].data = HT[i].data;
strcpy(HC[i].str,&cd[start]);
}
delete cd;
}//of CreateHuffmanCode
5.总代码
#include<iostream>
#include<fstream>
#include<string.h>
using namespace std;
#define MaxSize 1024
#define OK 1
#define ERROR 0
typedef int Status;
typedef struct wordcnt
{
char ch;
int cnt = 0;
}Count;
typedef struct NumCount
{
Count count[MaxSize];
int length = 0;
}NumCount;
typedef struct HTree
{
char data;
int weight;
int parent, lchild, rchild;
}HTNode, *HuffmanTree;
typedef struct HCode
{
char data;
char* str;
}*HuffmanCode;
Status ReadData(char *source); // 读入文件
Status WordCount(char *data,NumCount *paraCnt); // 统计次数
Status Show(NumCount *paraCnt); // 展示次数
Status CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray); // 创建哈夫曼树
Status select(HuffmanTree HT,int top,int *s1,int *s2); // 选择权重最小的两个节点
Status CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length); // 创建哈夫曼编码
Status Encode(char *data,HuffmanCode HC,int length); // 将读入的文件编码,写到txt文件
Status Decode(HuffmanTree HT,int length); //读入编码文件,解码
int main()
{
char data[MaxSize];
NumCount Cntarray;
ReadData(data);
WordCount(data, &Cntarray);
//Show(&Cntarray);
HuffmanTree tree;
CreateHuffmanTree(tree, Cntarray.length, Cntarray);
HuffmanCode code;
CreateHuffmanCode(tree, code, Cntarray.length);
Encode(data, code, Cntarray.length);
Decode(tree, Cntarray.length);
cout<<"Please view the generated TXT file to check the result"<<endl;
return 0;
}
Status ReadData(char *source)
{
ifstream infile;
infile.open("in.txt");
cout<<"Reading..."<<endl;
cout<<"the input file is:"<<endl;
infile.getline(source, MaxSize);
cout<<source<<endl;
infile.close();
cout<<endl;
return OK;
}//of ReadData
Status WordCount(char *data, NumCount *paraCnt)
{
int flag;
int len = strlen(data);
for(int i = 0; i < len; i++)
{
flag = 0;
for(int j = 0; j < paraCnt->length; j++)
{
if(paraCnt->count[j].ch == data[i])
{
paraCnt->count[j].cnt++;
flag = 1;
break;
}
}
if(!flag)
{
paraCnt->count[paraCnt->length].ch = data[i];
paraCnt->count[paraCnt->length].cnt++;
paraCnt->length++;
}
}
return OK;
}//of WordCount
Status Show(NumCount *paraCnt)
{
cout<<"the length is "<<paraCnt->length<<endl;
for(int i = 0; i < paraCnt->length; i++)
{
cout<<"The character "<<paraCnt->count[i].ch<<" appears "<<paraCnt->count[i].cnt<<endl;
}
cout<<endl;
return OK;
}//of Show
Status CreateHuffmanTree(HuffmanTree &HT, int length, NumCount cntarray)
{
if(length <= 1) return ERROR;
int s1, s2;
int m = length * 2 - 1;
HT = new HTNode[m + 1];
for(int i = 1; i <= m; i++)
{
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
for(int i = 1; i <= length; i++)
{
HT[i].data = cntarray.count[i - 1].ch;
HT[i].weight = cntarray.count[i - 1].cnt;
}
for(int i = length + 1; i <= m; i++)
{
select(HT, i - 1, &s1, &s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
return OK;
}//of CreateHuffmanTree
Status select(HuffmanTree HT, int top, int *s1, int *s2)
{
int min = INT_MAX;
for(int i = 1; i <= top; i++)
{
if(HT[i].weight < min && HT[i].parent == 0)
{
min = HT[i].weight;
*s1 = i;
}
}
min = INT_MAX;
for(int i = 1; i <= top; i++)
{
if(HT[i].weight < min && i != *s1 && HT[i].parent == 0)
{
min = HT[i].weight;
*s2 = i;
}
}
return OK;
}//of select
Status CreateHuffmanCode(HuffmanTree HT, HuffmanCode &HC, int length)
{
HC = new HCode[length + 1];
char *cd = new char[length];
cd[length - 1] = '\0';
int c, f, start;
for(int i = 1; i <= length; i++)
{
start = length - 1;
c = i;
f = HT[c].parent;
while (f != 0)
{
start--;
if(HT[f].lchild == c)
{
cd[start] = '0';
}
else
{
cd[start] = 1;
}
c = f;
f = HT[c].parent;
}
HC[i].str = new char[length - start];
HC[i].data = HT[i].data;
strcpy(HC[i].str,&cd[start]);
}
delete cd;
}//of CreateHuffmanCode
Status Encode(char *data, HuffmanCode HC, int length)
{
ofstream outfile;
outfile.open("code.txt");
for(int i = 0; i < strlen(data); i++)
{
for(int j = 1; i <= length; j++)
{
if(data[i] == HC[j].data)
{
outfile<<HC[j].str;
}
}
}
outfile.close();
cout<<"the code txt has been written"<<endl;
cout<<endl;
return OK;
}//of Encode
Status Decode(HuffmanTree HT,int length)
{
char codetxt[MaxSize*length];
ifstream infile;
infile.open("code.txt");
infile.getline(codetxt,MaxSize*length);
infile.close();
ofstream outfile;
outfile.open("out.txt");
int root = 2*length-1;
for(int i = 0;i < strlen(codetxt);++i)
{
if(codetxt[i] == '0') root = HT[root].lchild;
else if(codetxt[i] == '1') root = HT[root].rchild;
if(HT[root].lchild == 0 && HT[root].rchild == 0)
{
outfile<<HT[root].data;
root = 2*length-1;
}
}
outfile.close();
cout<<"the output txt has been written"<<endl;
cout<<endl;
return OK;
}