Huffman编码实现

最新推荐文章于 2022-11-17 17:56:39 发布

wteo

最新推荐文章于 2022-11-17 17:56:39 发布

阅读量1k

点赞数

分类专栏：算法至上文章标签： huffman 算法导论算法

本文链接：https://blog.csdn.net/smileteo/article/details/18185917

版权

算法至上专栏收录该内容

16 篇文章 0 订阅

订阅专栏

算法思想：

哈夫曼编码算法基于贪心算法从叶子节点到根节点的方向编码的。

一．为每个符号建立一个叶子节点，并加上其相应的发生频率。

二．当有一个以上的节点存在时，进行下列循环:

把这些节点作为带权值的二叉树的根节点，左右子树为空
选择两棵根结点权值最小的树作为左右子树构造一棵新的二叉树，且至新的二叉树的根结点的权值为其左右子树上根结点的权值之和。
把权值最小的两个根节点移除
将新的二叉树加入队列中.

三．最后剩下的节点暨为根节点，此时二叉树已经完成。

示例：

符号	A	B	C	D	E
计数	15	7	6	6	5

其中huffman树构造过程如下图所示：

源码：

#include<iostream>
#include<string>
#include<vector>
#include <algorithm>
#include<list>

using namespace std;

typedef struct node{
	int  tag;
	int frequency;
	struct node * lchild;
	struct node * rchild;
	struct node * parent;
}Node,*TreeNode;

void buildList(const string str, list <TreeNode> &listnode){
	
	for (string::size_type is = 0; is != str.size(); ++is){
		list <TreeNode>::iterator ip = listnode.begin();
		for (; ip != listnode.end(); ++ip){
			if (str[is] == (*ip)->tag){
				(*ip)->frequency++;
				break;
			}
		}//ifor
		if (ip == listnode.end ()){
				TreeNode pnew = (TreeNode) malloc(sizeof(Node));
				pnew->tag = str[is];
				pnew->frequency = 1;
				pnew->lchild = NULL;
				pnew->rchild = NULL;
				pnew->parent = NULL;
				listnode.push_back(pnew);
		}//if
	}//ofor
	
}


bool comp(const TreeNode &p1, const TreeNode &p2) {
	return p1->frequency < p2->frequency;
}


void buildTree(list <TreeNode> &listnode){
	while (listnode.size() != 1){
		listnode.sort(comp);
		list <TreeNode>::iterator ip = listnode.begin();
		TreeNode p1 = (*ip);
		++ip;
		TreeNode p2 = (*ip);
		listnode.pop_front();
		listnode.pop_front();

		TreeNode pnew = (TreeNode)malloc(sizeof(Node));
		pnew->tag = -1;
		pnew->frequency = p1->frequency+p2->frequency;
		pnew->lchild = p1;
		pnew->rchild = p2;
		pnew->parent = NULL;
		p1->parent = pnew;
		p2->parent = pnew;
		listnode.push_back(pnew);
		//----for debugging and checking------------------------------------------
		//  cout << "------------------------" << endl;
		//  list <TreeNode>::iterator ip1 = listnode.begin();
		//  for (; ip1 != listnode.end(); ++ip1){
		// 	cout << (*ip1)->tag << "    " << (*ip1)->frequency << endl;
		//  }//ifor
		//----for debugging and checking------------------------------------------
	}
}

void locate(TreeNode T, char tag, TreeNode &p){  
	
	if (T != NULL){
		if (T->tag == tag)
			p = T;			
		locate(T->lchild, tag, p);
		locate(T->rchild, tag, p);
	}	
}

void reverse(string &str){
	int i = 0;
	int j = str.size() - 1;
	for (; i < j; i++, j--){
		char tmp;
		tmp = str[i];
		str[i] = str[j];
		str[j] = tmp;
	}
}

string huffman(list <TreeNode> listnode, string str){
	string str_huffman("");
	list <TreeNode>::iterator ip = listnode.begin();
	for (string::size_type is = 0; is != str.size(); ++is){
		TreeNode p;
		string str1("");
		locate(*ip, str[is], p);
		for (; p->parent != NULL; p = p->parent){
			if (p == p->parent->lchild)
				str1 += "0";
			else if (p == p->parent->rchild)
				str1 += "1";
		}
		reverse(str1);
		str_huffman += str1;
	}
	return str_huffman;
}

void main(){
	string str;   //= "abcddbacca";
	cout << "input the plaintext:" << endl;
	getline(cin,str);
	//cout << str;
	list <TreeNode> listnode;
	buildList(str, listnode);
	/*
	listnode.sort(comp);
	list <TreeNode>::iterator ip = listnode.begin();
	for (; ip != listnode.end(); ++ip){
		cout << (*ip)->tag << "    " << (*ip)->frequency << endl;
	}//ifor
	*/
	buildTree(listnode);
	/*
	list <TreeNode>::iterator ip1 = listnode.begin();
	for (; ip1 != listnode.end(); ++ip1){
		cout << (*ip1)->tag << "    " << (*ip1)->frequency << endl;
	}//ifor
	*/
	cout << endl;
	cout << "huffman code:"<<huffman(listnode, str)<<endl;
	//cout << "huffman code:" << "0011111010110001001010101100111110001001";
	system("pause");
}

于是，对于我们的原始字符串 beep boop beer!

其对就能的二进制为 : 0110 0010 0110 0101 0110 0101 01110000 0010 0000 0110 0010 0110 1111 0110 1111 0111 0000 0010 0000 0110 0010 01100101 0110 0101 0111 0010 0010 0001

我们的Huffman的编码为： 0001 01100101 0011 0110 1001 0100 0101 1110 1111

从上面的例子中，我们可以看到被压缩的比例还是很可观的

wteo

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Huffman编码实现

算法思想：哈夫曼编码算法基于贪心算法从叶子节点到根节点的方向编码的。一．为每个符号建立一个叶子节点，并加上其相应的发生频率。二．当有一个以上的节点存在时，进行下列循环:把这些节点作为带权值的二叉树的根节点，左右子树为空选择两棵根结点权值最小的树作为左右子树构造一棵新的二叉树，且至新的二叉树的根结点的权值为其左右子树上根结点的权值之和。把权值最小的两个根节点移除将新的二叉树加
复制链接

扫一扫

专栏目录