关于赫夫曼编码和赫夫曼树的相关知识可参考之前两篇文章(由二叉树构造赫夫曼树、赫夫曼编码)。本文介绍另一种构建赫夫曼树的方式,采用优先队列.
1.首先我们需要统计不同字符出现的次数。一个字符出现的次数越多,说明其优先级越高,其赫夫曼编码应该越短;
2.将待编码的字符(即带权节点)存入优先级队列,优先级即字符出现的次数;
3.不断迭代队列,直到队列中剩下一个元素(即根节点)。每次从队列中取出两个优先级最小的元素(优先级队列中的元素是按照节点优先级顺序排列的),然后生成一个新的赫夫曼树节点,节点左右孩子分别指向这两个元素,节点权值为这两个元素权值之和,将新节点插入队列;
4.根据赫夫曼树生成赫夫曼编码表。递归遍历赫夫曼树,对每个叶子节点进行编码(左0右1);
5.编码过程即对每个字符查编码表的过程;
6.解码过程即根据编码串访问赫夫曼树的过程.
实现:
/*****************************************************
赫夫曼编码优先队列版
by Rowandjj
2014/6/17
*****************************************************/
#include<iostream>
using namespace std;
typedef struct _HTNODE_//赫夫曼树节点的存储结构
{
char symbol;//符号(ASC码值)
struct _HTNODE_ *left;//左孩子
struct _HTNODE_ *right;//右孩子
}HtNode;
typedef struct _HTREE_//赫夫曼树
{
HtNode *root;
}HTree;
typedef struct _HLNODE_//赫夫曼码表节点
{
char symbol;//ASC码值
char *code;//对应的编码
struct _HLNODE_ *next;
}HlNode;
typedef struct _HLTABLE_//赫夫曼码表(以单链表形式存储不同ASC码及与之对应的编码表)
{
HlNode *first;
HlNode *last;
}HlTable;
//--------------------------------------------------------
#define MAX_SZ 256//队列最大长度
#define TYPE HtNode*//优先队列的节点定义
typedef struct _PQUEUENODE_//队列节点定义
{
TYPE val;//节点值
unsigned int priority;//优先级
struct _PQUEUENODE_ *next;
}pQueueNode;
typedef struct _PQUEUE_//优先队列定义
{
unsigned int size;//队列长度
pQueueNode *first;//首节点
}pQueue;
//-----------优先队列操作定义------------------------------
void InitQueue(pQueue **queue);//队列初始化
void AddQueue(pQueue **queue,TYPE val,unsigned int priority);//将一个带优先级的节点插入队列
TYPE GetQueue(pQueue **queue);//队列节点按照优先级顺序出队
void TraverseQueue(pQueue *queue);//遍历队列
//-----------赫夫曼编码操作定义----------------------------
HTree* BuildTree(char *inputString);//创建赫夫曼树
HlTable* BuildTable(HTree *huffmanTree);//构建赫夫曼码表(出现频率最高的ASC码将使用最短的编码)
void encode(HlTable *table,char *stringToEncode);//编码
void decode(HTree *tree,char *stringToDecode);//解码
void TraverseTable(HlTable *table);//遍历赫夫曼编码表
//---------------------------------------------------------
void InitQueue(pQueue **queue)
{
*queue = (pQueue *)malloc(sizeof(pQueue));
(*queue)->first = NULL;
(*queue)->size = 0;
}
void AddQueue(pQueue **queue,TYPE val,unsigned int priority)
{
if((*queue)->size == MAX_SZ)
{
cout<<"queue is full!";
return;
}
pQueueNode *aux = (pQueueNode*)malloc(sizeof(pQueueNode));
aux->priority = priority;
aux->val = val;
aux->next = NULL;
if((*queue)->first == NULL)//生成队列首节点
{
(*queue)->first = aux;
(*queue)->size++;
}else
{
if(priority <= (*queue)->first->priority)//插到首节点位置
{
aux->next = (*queue)->first;
(*queue)->first = aux;
(*queue)->size++;
}else//遍历队列插到合适的位置
{
pQueueNode *iterator = (*queue)->first;
while(iterator->next != NULL && priority > iterator->next->priority)
{
iterator = iterator->next;
}
aux->next = iterator->next;
iterator->next = aux;
(*queue)->size++;
}
}
}
TYPE GetQueue(pQueue **queue)
{
TYPE returnVal = NULL;
if ((*queue)->size > 0)
{
pQueueNode *nodeToDel = (*queue)->first;
(*queue)->first = (*queue)->first->next;
returnVal = nodeToDel->val;
free(nodeToDel);
(*queue)->size--;
}else
{
cout<<"queue is empty"<<endl;
}
return returnVal;
}
//-------------------------------------------------------------
HTree* BuildTree(char *inputString)
{
//统计字符出现次数
int *probability = (int*)malloc(sizeof(int)*256);
int i;
for(i = 0; i < 256;i++)
{
probability[i] = 0;
}
for(i = 0; inputString[i] != '\0';i++)
{
probability[(unsigned char)inputString[i]]++;
}
/* for(i = 0; i < 256; i++)//打印字符出现的次数(也即频率)
{
if(probability[i] != 0)
cout<<(char)i<<":"<<probability[i]<<endl;
}
*/
//根据每个字符出现的频率生成赫夫曼树
pQueue* huffmanQueue;
InitQueue(&huffmanQueue);
for(i = 0; i < 256; i++)
{
//1 现将带权节点加到队列中
if(probability[i] != 0)//只考虑带权节点,也就是说只对带权节点编码
{
HtNode *aux = (HtNode*)malloc(sizeof(HtNode));
aux->symbol = (char)i;
aux->left = NULL;
aux->right = NULL;
AddQueue(&huffmanQueue,aux,probability[i]);
}
}
cout<<"字符及出现频次:"<<endl;
TraverseQueue(huffmanQueue);
free(probability);
//2 再构建赫夫曼树
while(huffmanQueue->size != 1)
{
int priority = huffmanQueue->first->priority;
priority += huffmanQueue->first->next->priority;
//从队列中获取两个优先级最低的节点
HtNode *left = GetQueue(&huffmanQueue);
HtNode *right = GetQueue(&huffmanQueue);
//生成其父节点,父节点的权值为两个子节点的权值之和
HtNode *newNode = (HtNode*)malloc(sizeof(HtNode));
newNode->left = left;
newNode->right = right;
//将生成的父节点插入到优先队列的合适位置
AddQueue(&huffmanQueue,newNode,priority);
}
HTree *tree = (HTree*)malloc(sizeof(HTree));
tree->root = GetQueue(&huffmanQueue);
return tree;
}
void travelseTree(HtNode *treeNode,HlTable **table,int k,char code[256])
{
if(treeNode->left == NULL && treeNode->right == NULL)//走到叶子节点
{
code[k] = '\0';
HlNode *aux = (HlNode*)malloc(sizeof(HlNode));
aux->symbol = treeNode->symbol;
aux->code = (char *)malloc(sizeof(char)*(strlen(code)+1));
strcpy(aux->code,code);
aux->next = NULL;
if((*table)->first == NULL)//连接
{
(*table)->first = (*table)->last = aux;
}else
{
(*table)->last->next = aux;
(*table)->last = aux;
}
}
if(treeNode->left != NULL)
{
code[k] = '0';
travelseTree(treeNode->left,table,k+1,code);
}
if(treeNode->right != NULL)
{
code[k] = '1';
travelseTree(treeNode->right,table,k+1,code);
}
}
HlTable* BuildTable(HTree *huffmanTree)//由赫夫曼树构建赫夫曼编码表
{
HlTable *table = (HlTable*)malloc(sizeof(HlTable));
table->first = NULL;
table->last = NULL;
char code[256];
int k = 0;
travelseTree(huffmanTree->root,&table,k,code);
return table;
}
void encode(HlTable *table,char *stringToEncode)
{
HlNode *travel;
for(int i = 0; stringToEncode[i] != '\0'; i++)
{
travel = table->first;
while(travel->symbol != stringToEncode[i])
{
travel = travel->next;
}
cout<<travel->code<<" ";
}
}
void decode(HTree *tree,char *stringToDecode)
{
HtNode *travel = tree->root;
for(int i = 0;stringToDecode[i] != '\0'; i++)
{
if(travel->left == NULL && travel->right == NULL)
{
cout<<travel->symbol;
travel = tree->root;
}
if(stringToDecode[i] == '0')
{
travel = travel->left;
}
if(stringToDecode[i] == '1')
{
travel = travel->right;
}
if(stringToDecode[i] != '0' && stringToDecode[i] != '1')
{
cout<<"\ndecode error..."<<endl;
}
}
if(travel->left == NULL && travel->right == NULL)
{
cout<<travel->symbol;
travel = tree->root;
}
}
void TraverseQueue(pQueue *queue)
{
pQueueNode *iterator = queue->first;
while(iterator != NULL)
{
cout<<iterator->val->symbol<<":"<<iterator->priority<<endl;
iterator = iterator->next;
}
}
void TraverseTable(HlTable *table)
{
HlNode *iterator = table->first;
cout<<"字符"<<":"<<"编码"<<endl;
while(iterator != NULL)
{
cout<<iterator->symbol<<":"<<iterator->code<<endl;
iterator = iterator->next;
}
cout<<endl;
}
int main()
{
HTree *codeTree = BuildTree("abbccccddddddd");
HlTable *codeTable = BuildTable(codeTree);
TraverseTable(codeTable);
cout<<"编码开始:"<<endl;
encode(codeTable,"abcd");
cout<<"\n解码开始:"<<endl;
decode(codeTree,"10100100000101");
cout<<endl;
return 0;
}
测试结果:
优先队列变化过程:
生成的赫夫曼树:
注:这里的构建赫夫曼树的约定和之前的两篇不太一样,这里约定一个节点的左节点的权值小于右节点的权值.