一、
Huffman于1952年提出一种编码方法,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般称为哈夫曼编码(有时也称为霍夫曼编码)。
二、背景:
1951年,霍夫曼和他在MIT信息论的同学需要选择是完成学期报告还是期末考试。导师Robert M. Fano给他们的学期报告的题目是,查找最有效的二进制编码。由于无法证明哪个已有编码是最有效的,霍夫曼放弃对已有编码的研究,转向新的探索,最终发现了基于有序频率二叉树编码的想法,并很快证明了这个方法是最有效的。
由于这个算法,学生终于青出于蓝,超过了他那曾经和信息论创立者克劳德·香农共同研究过类似编码的导师。霍夫曼使用自底向上的方法构建二叉树,避免了次优算法香农-范诺编码的最大弊端──自顶向下构建树。
三、
哈夫曼编码是一种统计编码,属于无损压缩编码。它也是一种变长编码,也就是说,对于出现频率高的信息,对应的编码长度较短,反之较长。通过这样的编码处理,表示全部信息所用的总码长一定小于表示实际信息的所用的符号总长度。
四、
哈夫曼树,又称最优二叉树,是指带权路径长度最小的二叉树。树的带权路径长度,就是树中所有的叶结点的权值乘上其到根结点的路径长度的积之和。
五、
哈夫曼树中的权值可以理解为:字符的出现频率
六、如何构造哈夫曼树:
a、给定n个权值为{w1,w2,w3,...,wn},先构造n棵只有根结点(带有相应的权值)的二叉树(其左右子树为空);
b、在森林中选取两棵根结点的权值最小的二叉树,将它们作为左右子树并构造一棵新的二叉树。然后置这棵新的二叉树的根结点的权值为其左右子树的根结点的权值之和;
c、在该森林中删除这两颗二叉树,将该新的二叉树加入该森林中;
d、重复b、c,直到最终森林中只有一棵二叉树为止,这棵二叉树就是哈夫曼树。
七、实现和应用:(一个小型的哈夫曼编码/译码系统)
(1)实现一个含有如下功能项的主菜单:
I-----Initialization(初始化、建立哈夫曼树)
T-----Tree printing(打印哈夫曼树)
C-----Huffman code printing(打印哈夫曼编码)
E-----Encoding(编码)
P-----Print(打印编码文件)
D-----Decoding(译码)
Q-----Quit(退出)
(2)实现所有功能项对应的具体功能:
a、初始化、建立哈夫曼树:从终端读入字符集大小n、n个字符、n个权值,建立起哈夫曼树,并将其存于文件hfmTree.txt中;
b、打印哈夫曼树:将内存中的哈夫曼树以直观的形式(这里使用横向打印的树结构)显示在终端上,同时将该形式的哈夫曼树写入文件TreePrint.txt中;
c、打印哈夫曼编码:将字符集对应的哈夫曼编码显示在终端;
d、编码(有两种方式):
*利用已经建立好的哈夫曼树(如果不在内存中,则从文件hfmTree.txt中读入)对文件ToBeTran.txt中的文本进行编码。然后将结果存入文件CodeFile.txt中;
*利用已经建立好的哈夫曼树(如果不在内存中,则从文件hfmTree.txt中读入)对实时输入的文本进行编码,然后将结果显示在终端上。
e、打印编码文件:将文件CodeFile.txt以紧凑格式显示在终端上,每行50个代码,同时将该字符形式的编码文件写入文件CodePrint.txt中;
f、译码(也有两种方式):
*利用已经建立好的哈夫曼树(如果不在内存中,则从文件hfmTree.txt中读入)对文件CodeFile.txt中的文本进行译码,然后将结果存入文件TextFile.txt中;
*利用已经建立好的哈夫曼树(如果不在内存中,则从文件hfmTree.txt中读入)对实时输入的文本进行译码,然后将结果显示在终端上。
g、退出;
(3)代码实现:
#include<iostream>
#include<fstream>
#include<string>
#include<Windows.h>
using namespace std;
//哈夫曼树结点类
struct hfmNode
{
int leftChild, rightChild, parent;
double weight;
hfmNode() :parent(0), weight(0.0) {}
};
int hfmSize = 0; //哈夫曼树的规模
char *characters = nullptr; //字符集
double *weight = nullptr; //权值
string *huffmanCodeStrings = nullptr; //字符集对应的哈夫曼编码
hfmNode* hfmTree = nullptr; //结构体数组形式的哈夫曼树
//分隔线
void printLine()
{
cout << "-----------------------------------------------" << endl;
}
//通过读取已经存在于文件hfmTree.txt中哈夫曼树的相关数据进行初始化
void initByFile()
{
ifstream in("D:\\hfmTree.txt", ios::in | ios::binary);
if (!in.is_open())
{
cerr << "文件打开失败!" << endl;
return;
}
in.read((char*)&hfmSize, sizeof(int));//一定要先从文件中读取哈夫曼树的大小!
//0号单元不用,起始下标从1开始
int m = 2 * hfmSize - 1;
hfmTree = new hfmNode[m + 1];
characters = new char[hfmSize + 1];
weight = new double[hfmSize + 1];
huffmanCodeStrings = new string[hfmSize + 1];
in.read(characters, sizeof(char)*(hfmSize + 1));
in.read((char*)weight, sizeof(double)*(hfmSize + 1));
in.read((char*)huffmanCodeStrings, sizeof(string)*(hfmSize + 1));
in.read((char*)hfmTree, sizeof(hfmTree[0])*(m + 1));
in.close();
}
//从哈夫曼树的n个结点中选出权值最小的结点,并返回该结点的索引
int minInhuffmanTree(hfmNode hfmTree[], int n)
{
int minIndex = 0;
int minWeight = INT_MAX;
for (int i = 1; i <= n; i++)
{
if (hfmTree[i].weight < minWeight&&hfmTree[i].parent == 0)
{
minWeight = hfmTree[i].weight;
minIndex = i;
}
}
hfmTree[minIndex].parent = 1; //设置其parent为1,表示该结点已经“使用过”
return minIndex;
}
//从哈夫曼树的n个结点中选出权值最小的两个结点,并通过参数引用带回对应的索引
void selectTwoMinsFromhfmTree(hfmNode hfmTree[], int n, int& min1, int& min2)
{
min1 = minInhuffmanTree(hfmTree, n);
min2 = minInhuffmanTree(hfmTree, n);
//使得min1保存权值最小的两个结点hfmTree[min1], hfmTree[min2]中最小的索引
if (min1 > min2) swap(min1, min2);
}
//通过建立哈夫曼树得到字符集的相应哈夫曼编码
void huffmanCoding(hfmNode hfmTree[], string huffmanCodeStrings[], double weight[], int n)
{
int min1, min2;
if (n <= 1) return;
int m = 2 * n - 1;
//初始化各个结点的权值
for (int i = 1; i <= n; i++)
{
hfmTree[i].weight = weight[i];
hfmTree[i].parent = hfmTree[i].leftChild = hfmTree[i].rightChild = 0;
}
for (int i = n + 1; i <= m; i++) hfmTree[i].parent = 0;
for (int i = n + 1; i <= m; i++)
{
selectTwoMinsFromhfmTree(hfmTree, i - 1, min1, min2);
hfmTree[min1].parent = hfmTree[min2].parent = i;
hfmTree[i].leftChild = min1;
hfmTree[i].rightChild = min2;
hfmTree[i].weight = hfmTree[min1].weight + hfmTree[min2].weight;
}
//从哈夫曼树的n个叶节点出发,自底向上沿着通往根结点的路径,最终分别得到n个不同字符对应的哈夫曼编码
int parent, current;
for (int i = 1; i <= n; i++)
{
string huffmanCodeString = "";
int length = 0;
current = i;
parent = hfmTree[current].parent;
while (parent != 0)
{
if (hfmTree[parent].leftChild == current) huffmanCodeString = '0' + huffmanCodeString;
else huffmanCodeString = '1' + huffmanCodeString;
current = parent;
parent = hfmTree[current].parent;
}
huffmanCodeStrings[i] = huffmanCodeString;
}
}
//I-----Initialization(初始化、建立哈夫曼树)
void init()
{
int sum;
cout << "请输入您所要编码的字符种类总数:";
cin >> sum;
hfmSize = sum;
//0号单元不用,起始下标从1开始
characters = new char[sum + 1];
weight = new double[sum + 1];
huffmanCodeStrings = new string[sum + 1];
int m = 2 * sum - 1;
hfmTree = new hfmNode[m + 1];
cout << endl << "请您按顺序输入每种字符以及其对应的权值:" << endl;
printLine();
cin.get();//吃掉回车
for (int i = 1; i <= sum; i++)
{
cout << "请您输入第 " << i << " 个字符:";
characters[i] = getchar();
cin.get(); //吃掉回车
cout << "请您输入该字符所所应的权值:";
cin >> weight[i];
cin.get(); //吃掉回车
printLine();
}
cout << "字符集为:" << endl;
for (int i = 1; i <= sum; i++)
{
cout << characters[i] << ":" << weight[i] << endl;
}
cout << endl;
huffmanCoding(hfmTree, huffmanCodeStrings, weight, sum);
printLine();
//将各种字符的哈夫曼编码写入文件hfmTree.txt中
cout << "下面将各种字符的哈夫曼编码写入文件hfmTree.txt中......" << endl;
ofstream out("D:\\hfmTree.txt", ios::out | ios::binary);
if (!out.is_open())
{
cerr << "文件打开失败!" << endl;
return;
}
out.write((char*)&hfmSize, sizeof(int));
out.write(characters, sizeof(char)*(hfmSize + 1));
out.write((char*)weight, sizeof(double)*(hfmSize + 1));
out.write((char*)huffmanCodeStrings, sizeof(string)*(hfmSize + 1));
out.write((char*)hfmTree, sizeof(hfmTree[0])*(m + 1));
cout << "写入文件hfmTree.txt成功!" << endl;
out.close();
}
//T-----Tree printing(打印哈夫曼树)
//将要用来打印的树枝,注意其中:branches[0]=" "; branches[2]="\\"(占一个字节)
char branches[] = { " /\\<" };
void printHfmTree(int root, int height, ostream& out)
{
if (root != 0)
{
//先打印当前结点的右子树,并且深度+1
printHfmTree(hfmTree[root].rightChild, height + 1, out);
//通过跳格符来表现当前节点的深度,深度越大的结点会越往右
for (int i = 0; i < height; i++) out << "\t";
//输出当前结点的权值
out << hfmTree[root].weight;
//如果当前结点是叶结点,则再打印出相应的字符
if (hfmTree[root].leftChild == 0 && hfmTree[root].rightChild == 0) out << "(" << characters[root] << ")";
//打印树枝
out << branches[((hfmTree[root].leftChild != 0) << 1) | (hfmTree[root].rightChild != 0)];
//换行,打印当前结点的左子树
out << endl;
printHfmTree(hfmTree[root].leftChild, height + 1, out);
}
}
void PrintHfmTree()
{
cout << "该哈夫曼树打印如下(横向打印):" << endl << endl;
printHfmTree(2 * hfmSize - 1, 0, cout);
ofstream out("D:\\TreePrint.txt", ios::out);
if (!out.is_open())
{
cerr << "文件打开失败!" << endl;
exit(1);
}
printHfmTree(2 * hfmSize - 1, 0, out);
cout << "写入文件TreePrint.txt成功!" << endl;
out.close();
}
//C---- - Huffman code printing(打印哈夫曼编码)
void printHfmCodeStrings()
{
cout << "该字符集的编码如下:" << endl << endl;
for (int i = 1; i <= hfmSize; i++)
{
cout << "字符 " << characters[i] << "(权值为" << weight[i] << ")" << " : " << huffmanCodeStrings[i] << endl;
}
}
//E-----Encoding(编码),对文本进行编码(支持含空格的文本)
void encodeText()
{
char inputType,ch=' ';
string textToBeEncoded = "", encodeString = "";
cout << "您有如下两种方式提供待编码文本:" << endl << endl;
cout << "1-----读取文件ToBeTran.txt中的待编码文本;" << endl;
cout << "2-----读取实时输入的待编码文本;" << endl;
printLine();
cout << "您选择方式: " << endl;
cin >> inputType;
if (inputType == '1')
{
ifstream in("D:\\ToBeTran.txt", ios::in);
if (!in.is_open())
{
cerr << "文件打开失败!" << endl;
exit(1);
}
ofstream out("D:\\CodeFile.txt", ios::out);
if (!out.is_open())
{
cerr << "文件打开失败!" << endl;
exit(1);
}
cin.get();//吃掉回车
while ((ch = in.get()) != EOF)
{
textToBeEncoded = textToBeEncoded + ch;
}
for (int i = 0; i < textToBeEncoded.length(); i++)
{
for (int j = 1; j <= hfmSize; j++)
{
if (characters[j] == textToBeEncoded[i]) encodeString = encodeString + huffmanCodeStrings[j];
}
}
out << encodeString;
cout << "该段文本被编码后写入文件CodeFile.txt成功!" << endl;
in.close();
out.close();
}
else if (inputType == '2')
{
cout << "请您输入待编码文本:" << endl;
cin.get();//吃掉回车
while (cin.get(ch) && ch != '\n')
{
textToBeEncoded = textToBeEncoded + ch;
}
for (int i = 0; i < textToBeEncoded.length(); i++)
{
for (int j = 1; j <= hfmSize; j++)
{
if (characters[j] == textToBeEncoded[i]) encodeString = encodeString + huffmanCodeStrings[j];
}
}
cout << "该段文本被编码为如下:" << endl;
cout << encodeString << endl;
}
}
//P---- - Print(打印编码文件)
void printEncodeFile()
{
ifstream in("D:\\CodeFile.txt", ios::in);
if (!in.is_open())
{
cerr << "文件打开失败!" << endl;
exit(1);
}
ofstream out("D:\\CodePrint.txt", ios::out);
if (!out.is_open())
{
cerr << "文件打开失败!" << endl;
exit(1);
}
string encodeString = "";
in >> encodeString;
for (int i = 0; i < encodeString.length(); i++)
{
cout << encodeString[i];
out << encodeString[i];
//每行50个代码
if ((i + 1) % 50 == 0)
{
cout << endl;
out << endl;
}
}
cout << endl;
cout << "写入文件CodePrint.txt成功!" << endl;
in.close();
out.close();
}
//D-----Decoding(译码),对文本进行译码(支持含空格的文本)
void decodeText()
{
char inputType, ch = ' ';
string textToBeDecoded, decodeString = "";
cout << "您有如下两种方式提供待译码文本:" << endl << endl;
cout << "1-----读取文件CodeFile.txt中的待译码文本;" << endl;
cout << "2-----读取实时输入的待译码文本;" << endl;
printLine();
cout << "您选择方式: " << endl;
cin >> inputType;
if (inputType == '1')
{
ifstream in("D:\\CodeFile.txt", ios::in);
if (!in.is_open())
{
cerr << "文件打开失败!" << endl;
exit(1);
}
ofstream out("D:\\TextFile.txt", ios::out);
if (!out.is_open())
{
cerr << "文件打开失败!" << endl;
exit(1);
}
cin.get();//吃掉回车
while ((ch = in.get()) != EOF)
{
textToBeDecoded = textToBeDecoded + ch;
}
int m = 2 * hfmSize - 1;
for (int i = 0; i < textToBeDecoded.length(); i++)
{
if (textToBeDecoded[i] == '0')
{
m = hfmTree[m].leftChild;
//如果已经走到哈夫曼树的叶结点
if (hfmTree[m].leftChild == 0)
{
decodeString = decodeString + characters[m];
m = 2 * hfmSize - 1;
}
}
else if (textToBeDecoded[i] == '1')
{
m = hfmTree[m].rightChild;
//如果已经走到哈夫曼树的叶结点
if (hfmTree[m].leftChild == 0)
{
decodeString = decodeString + characters[m];
m = 2 * hfmSize - 1;
}
}
}
out << decodeString;
cout << "该段文本被译码后写入文件TextFile.txt成功!" << endl;
in.close();
out.close();
}
else if (inputType == '2')
{
cout << "请您输入待译码文本:" << endl;
cin.get();//吃掉回车
while (cin.get(ch) && ch != '\n')
{
textToBeDecoded = textToBeDecoded + ch;
}
int m = 2 * hfmSize - 1;
for (int i = 0; i < textToBeDecoded.length(); i++)
{
if (textToBeDecoded[i] == '0')
{
m = hfmTree[m].leftChild;
//如果已经走到哈夫曼树的叶结点
if (hfmTree[m].leftChild == 0)
{
decodeString = decodeString + characters[m];
m = 2 * hfmSize - 1;
}
}
else if (textToBeDecoded[i] == '1')
{
m = hfmTree[m].rightChild;
//如果已经走到哈夫曼树的叶结点
if (hfmTree[m].leftChild == 0)
{
decodeString = decodeString + characters[m];
m = 2 * hfmSize - 1;
}
}
}
cout << "该段文本被译码为如下:" << endl;
cout << decodeString << endl;
}
}
int main(void)
{
bool back = true;
char handle, choice;
while (back)
{
system("cls");
cout << "********Welcome to use the Huffman Encoding System!!!********" << endl << endl;
cout << "\t" << "I-----Initialization(初始化、建立哈夫曼树)" << endl << endl;
cout << "\t" << "T-----Tree printing(打印哈夫曼树)" << endl << endl;
cout << "\t" << "C-----Huffman code printing(打印哈夫曼编码)" << endl << endl;
cout << "\t" << "E-----Encoding(编码)" << endl << endl;
cout << "\t" << "P-----Print(打印编码文件)" << endl << endl;
cout << "\t" << "D-----Decoding(译码)" << endl << endl;
cout << "\t" << "Q-----Quit(退出)" << endl << endl;
cout << endl;
cout << "请输入您想进行的操作: ";
cin >> handle;
switch (handle)
{
case 'I':
{
system("cls");
init();
cout << endl;
cout << "是否返回主菜单? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'T':
{
system("cls");
if (hfmTree == nullptr) initByFile();
PrintHfmTree();
cout << endl;
cout << "是否返回主菜单? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'C':
{
system("cls");
if (hfmTree == nullptr) initByFile();
printHfmCodeStrings();
cout << endl;
cout << "是否返回主菜单? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'E':
{
system("cls");
if (hfmTree == nullptr) initByFile();
encodeText();
cout << endl;
cout << "是否返回主菜单? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'P':
{
system("cls");
printEncodeFile();
cout << endl;
cout << "是否返回主菜单? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'D':
{
system("cls");
if (hfmTree == nullptr) initByFile();
decodeText();
cout << endl;
cout << "是否返回主菜单? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'Q':
{
system("cls");
exit(1);
break;
}
}
}
return 0;
}
(经测试,以上代码可以正常运行)