Huffman树又称最优二叉树,是一个带权路径长度最短的树。
算法思想是:
1.将所有有权值分别构建一个只有一个节点的二叉树结构,将这些节点加入集合A中(Huffman森林)
2.检查集合A成员的个数,如果为1,则算法结束,集合A中唯一的节点为Huffman树的根
3.从集合A中取出根节点权值最小的两个树a,b,集合中不再保留这两颗树
4.由a,b分别为左右子树构建一个新的二叉树r,令r的权值等于a,b权值之和
5.将r加入集合A中
6.从第2步开始重复步骤
前缀编码:若要设计长短不等的编码,则必须是任意一个字符的编码都不是另一个字符编码的前缀,这种编码就是前缀编码。
树的带权路径长度,就是树种所有的叶节点的权值乘上其到根节点的路径长度,树的路径长度是从树根到每一个节点的路径长度之和(WPL)
Huffman的WPL是最小的。
下面参考网上一些代码写了一个Huffman树的简单操作,并且获得Huffman编码,关于编码的获得,我是用了STL的map和list作为辅助操作,可能不是很好的方式,但是也能达到效果。还有代码了不知道哪里内存泄露了(该delete的地方我都delete了,不知道哪里又泄露了,希望你们帮我看一下)
//HuffmanTree.h
#pragma once #include <map> #include <string> #include <list> #include <iostream> using namespace std; //Huffman节点类型 struct HuffmanNode { char nodedata; //节点上的数据 int nodepower; //权,即字符出现频率 HuffmanNode *lNode; HuffmanNode *rNode; HuffmanNode *nextNode; }; //SomeClass class SomeClass { public: SomeClass(); ~SomeClass(); //构建Huffman森林 void CreateHuffmanForest(int n, char chdata[], int nodepower[]); //构建Huffman树 HuffmanNode* CreateHuffmanTree(int n, char chdata[], int nodepower[]); //加入节点 void AddNode(HuffmanNode *head, HuffmanNode *newnode); //计算Huffman带权路径长度 int GetPWL(HuffmanNode *head, int treelevel); //获取Huffman编码 //void GetHuffmanCodes(HuffmanNode *head, char *code); void GetHuffmanCodes(HuffmanNode *head); //释放节点 void FreeNodes(HuffmanNode *head); //获取头节点 HuffmanNode* GetRoot() { return m_root; } //获取map map<char, list<int> >& GetMap() { return m_code; } //释放资源 void CleanHuffmanTree(); private: //根节点 HuffmanNode *m_root; //记录字符频率 map<char, list<int> > m_code; list<int> tmpList; };
//HuffmanTree.cpp
#include "HuffmanTree.h" SomeClass::SomeClass() : m_root(NULL) { HuffmanNode * root = new HuffmanNode; root->nodedata = 0; root->nodepower = 0; root->lNode = NULL; root->rNode = NULL; root->nextNode = NULL; m_root = root; } SomeClass::~SomeClass() { //CleanHuffmanTree(); } //构建Huffman森林 //目的是把所有节点构建成一个单独的树,并用把所有节点用链表存储起来 void SomeClass::CreateHuffmanForest(int n, char chdata[], int nodepower[]) { for (int i = 0; i<n; i++) { HuffmanNode *newNode = new HuffmanNode; newNode->nodedata = chdata[i]; newNode->nodepower = nodepower[i]; newNode->lNode = NULL; newNode->rNode = NULL; newNode->nextNode = NULL; AddNode(m_root, newNode); } } //构建Huffman树 HuffmanNode* SomeClass::CreateHuffmanTree(int n, char chdata[], int nodepower[]) { //构建Huffman森林 CreateHuffmanForest(n, chdata, nodepower); //构建Huffman树 HuffmanNode *head = m_root; while(head->nextNode) { //如果是最后一个节点,则退出 if (head->nextNode->nextNode == NULL) { //返回真正的Huffman根 return head->nextNode; } //取链表中前两个节点,构建一棵二叉树,并且从链表中取出这两个节点 //将此两个节点的权值相加赋予新生产的节点,并将新节点加入链表中 //如此反复,直到只剩一个节点为止 HuffmanNode *pNode1, *pNode2, *pRoot; pNode1 = head->nextNode; pNode2 = pNode1->nextNode; //将前两个节点从链表中去除 head->nextNode = pNode2->nextNode; pRoot = new HuffmanNode; pRoot->nodepower = pNode1->nodepower + pNode2->nodepower; pRoot->lNode = pNode1; pRoot->rNode = pNode2; pRoot->nextNode = NULL; //将新节点加入Huffman森林 AddNode(head, pRoot); } return NULL; } //加入节点 //所有节点链接到头节点,并且按照权值从小到大排序 void SomeClass::AddNode(HuffmanNode *head, HuffmanNode *newnode) { HuffmanNode *tmpNode = head; //若链表为空,则把新节点加入链表后退出 if (tmpNode->nextNode == NULL) { tmpNode->nextNode = newnode; return ; } //从头节点后开始对比查找,在第一个小于新节点权值的节点前面插入新节点 while (tmpNode->nextNode && (tmpNode->nextNode->nodepower) < (newnode->nodepower)) { tmpNode = tmpNode->nextNode; } newnode->nextNode = tmpNode->nextNode; //若是末尾,则tmpNode->nextNode为NULL tmpNode->nextNode = newnode; return ; } ////获取Huffman编码 //void SomeClass::GetHuffmanCodes(HuffmanNode *head, char *code) //{ // //是叶节点,则保存编码 // if (head->lNode==NULL && head->rNode==NULL) // { // int tsize = strlen(code); // char *tmpchar = new char[tsize+2]; // s_new++; // strcpy(tmpchar, code); // //tmpchar[tsize+1] = '/0'; // codemap[head->nodedata] = tmpchar; // // return ; // } // // int len = strlen(code); // char *newcode = new char[len+2]; // s_new++; // strcpy(newcode, code); // newcode[len+1] = '/0'; // // if (head->lNode) //左分支分配'0' // { // newcode[len] = '0'; // GetHuffmanCodes(head->lNode, newcode); // } // if (head->rNode) //右分支分配'1' // { // newcode[len] = '1'; // GetHuffmanCodes(head->rNode, newcode); // } // delete[] newcode; // s_delete++; //} //通过后续遍历来获得Huffman编码,我用了STL的list和map,本人比较笨,只能想到这样了,如果有其他好的方便的方法,请告诉我 void SomeClass::GetHuffmanCodes(HuffmanNode *head) { if (NULL != head->lNode) { tmpList.push_back(0); GetHuffmanCodes(head->lNode); } if (NULL != head->rNode) { tmpList.push_back(1); GetHuffmanCodes(head->rNode); } if (NULL != head) { //当遇到子节点的时候,把相关详细赋予map中对应的字符。并且把最后加入的编码弹出,用来递归存储下一个编码 char ch = head->nodedata; m_code[ch] = tmpList; //tmpList.clear(); //清空list是错误的。 if (!tmpList.empty()) { tmpList.pop_back(); } } } //计算Huffman带权路径长度 int SomeClass::GetPWL(HuffmanNode *head, int treelevel) { if (head == NULL) { return 0; } if (head->lNode==NULL && head->rNode==NULL) { return (head->nodepower)*treelevel; } return GetPWL(head->lNode, treelevel+1) + GetPWL(head->rNode, treelevel+1); } //释放节点 void SomeClass::FreeNodes(HuffmanNode *head) { //后序遍历是最后才去操作根节点,所以,清理的时候,也通过后续遍历,先清理左右孩子节点 if (NULL != head->lNode) FreeNodes(head->lNode); if (NULL != head->rNode) FreeNodes(head->rNode); delete head; head = NULL; } //释放资源 void SomeClass::CleanHuffmanTree() { //释放树节点 FreeNodes(m_root->nextNode); //删除m_root if (m_root != NULL) { delete m_root; m_root = NULL; } }
//Main.cpp
#include "HuffmanTree.h" int main() { map<char, int> m_Record; //记录字符频率 map<char ,list<int> > m_CodeMap;//记录Huffman编码 //输入字符,并统计频率 cout<<"输入字符,当输入数字0时,退出输入."<<endl; char ch ; while (ch = cin.get()) { if (ch == '0') { break; } m_Record[ch]++; } int count = m_Record.size(); char *charArray = new char[count]; int *powerArray = new int[count]; int i=0; for (map<char, int>::iterator iter = m_Record.begin(); iter!=m_Record.end(); ++iter) { charArray[i] = iter->first; powerArray[i] = iter->second; i++; } SomeClass m_SC; //创建Huffman树 m_SC.CreateHuffmanTree(count, charArray, powerArray); cout<<"Huffman树的带权路径长度PWL:"<<m_SC.GetPWL((m_SC.GetRoot())->nextNode, 0)<<endl; //获取Huffman编码 m_SC.GetHuffmanCodes((m_SC.GetRoot())->nextNode); //获取CodeMap m_CodeMap = m_SC.GetMap(); //打印 cout<<"输出Huffman编码:"<<endl; for (map<char, list<int> >::iterator it = m_CodeMap.begin(); it != m_CodeMap.end(); ++it) { for(list<int>::iterator listIT = (it->second).begin(); listIT!=(it->second).end(); ++listIT) { cout<<*listIT<<" "; } cout<<endl; } cout<<endl; //释放内存 delete [] powerArray; delete [] charArray; m_SC.CleanHuffmanTree(); _CrtDumpMemoryLeaks(); getchar(); return 0; }
运行结果如图:
额,忘了把字符显示了,呵呵。0为a, 10为b,110为d, 111为c