//---------------------------------------HuffmanTree------------------------------------//
//霍夫曼编码(Huffman Coding)是一种编码方式,是一种用于无损数据压缩的熵编码(权编码)算法//
//霍夫曼编码使用变长编码表对源符号(如文件中的一个字母)进行编码,其中变长编码表是通过一种评估来源符号出现机率的方法得到的,
//出现机率高的字母使用较短的编码,反之出现机率低的则使用较长的编码,这便使编码之后的字符串的平均长度、期望值降低,从而达到无损压缩数据的目的。//霍夫曼树又称最优二叉树,是一种带权路径长度最短的二叉树。所谓树的带权路径长度,就是树中所有的叶结点的权值乘上其到根结点的路径长度(若根结点为0层,叶结点到根结点的路径长度为叶结点的层数)。
#include < stdio.h >
#include < stdlib.h >
#include < string.h >#define TRUE 1
#define FALSE 0
#define OK 1
#define ERROR 0
#define OVERFLOW -2 //Include by "math.h"!typedef int Status ;
typedef struct
{
unsigned int weight ;
unsigned int parent , lchild , rchild ;
} HTNode , *HuffmanTree ; //动态分配数组存储霍夫曼树typedef char ** HuffmanCode ; //动态分配数组存储霍夫曼编码表
//每个字符编码的结果为一串二进制,所以要用二级指针
//------------------------Basic Fuction----------------------------//
#define MAX 100 //最大权值int Min( HuffmanTree T , int i )
{
int flag , j ;
unsigned int k = MAX ;
for( j = 1 ; j <= i ; ++ j )
{
if(T[ j ].weight < k && T[ j ].parent == 0 ) //用有没有父节点来排除已经选过的!太巧妙了!
{
k = T[ j ].weight ;
flag = j ;
}
}
T[ flag ].parent = 1 ; //只是简单做个访问过的标记
return flag ;
}Status Select( HuffmanTree T , int i , int *Sa , int *Sb )
{ //在HT[ 1....i ]中选择parent为0且weight最小的两个节点,其序号分别为sa,sb ;
int j = 0 ;
( *Sa ) = Min( T , i ) ;
( *Sb ) = Min( T , i ) ;
if( ( *Sa ) > ( *Sb ) ) //(*Sa)为最小的两个值中序号小的那个
{
j = ( *Sa ) ;
( *Sa ) = ( *Sb ) ;
( *Sb ) = j ;
}
return OK ;
}Status HuffmanCoding( HuffmanTree *HT , HuffmanCode *HC , int *w , int n )
{ //w 存放n个字符的权值(均>0) ,构造霍夫曼树HT, 并求出n个字符的霍夫曼编码HC
int m , i , start ;
unsigned int c , f ;
int sa , sb ;
HuffmanTree p ;
char *cd ;
if( n <= 1 )
return ERROR ;
m = 2 * n - 1 ; //n为叶子节点数,m为总节点数
( *HT ) = ( HuffmanTree )malloc( ( m + 1 ) * sizeof( HTNode ) ) ; //0号单元未用
//-------------------Initilize ---------------------//
for( p = ( *HT ) + 1 , i = 1 ; i <= n ; ++ i , ++ p , ++ w ) //0号单元未用( Take care about "p=(*HT)+1" !)
{
( *p ).weight = *w ;
( *p ).parent = ( *p ).lchild = ( *p ).rchild = 0 ;
}
for( i = n + 1 ; i <= m ; ++ i , ++ p )
{
( *p ).parent = ( *p ).lchild = ( *p ).rchild = ( *p ).weight = 0 ;
}
//-----------------构建霍夫曼树----------------------//
for( i = n + 1 ; i <= m ; ++ i )
{
Select( *HT , i - 1 , &sa , &sb ) ; //在HT[ 1....(i-1) ]中选择parent为0且weight最小的两个节点,其序号分别为sa,sb ;
( *HT )[ sa ].parent = i ;
( *HT )[ sb ].parent = i ;
( *HT )[ i ].lchild = sa ;
( *HT )[ i ].rchild = sb ;
( *HT )[ i ].weight = ( *HT )[ sa ].weight + ( *HT )[ sb ].weight ;
}
//-------从叶子到根逆向求每个字符的霍夫曼编码--------//
( *HC ) = ( HuffmanCode )malloc( ( n + 1 ) * sizeof( char * ) ) ;//分配n个字符编码的头指针向量(n个数据,最长的编码不会超过n位)
cd = ( char * )malloc( n * sizeof( char ) ) ; //分配求编码的工作空间
cd[ n - 1 ] = '\0' ;
for( i = 1 ; i <= n ; ++ i ) //逐个字符求霍夫曼编码
{
start = n - 1 ; //编码结束符位置,start一位一位向前移动
for( c = i , f = ( *HT )[ i ].parent ; f != 0 ; c = f , f = ( *HT )[ f ].parent ) //从叶子到根逆向求编码
{
if( ( *HT )[ f ].lchild == c )
cd[ -- start ] = '0' ; //从第(n-2)位开始
else
cd[ -- start ] = '1' ;
}
( *HC )[ i ] = ( char * )malloc( ( n - start ) * sizeof( char ) ) ; //为第i个字符编码分配空间
strcpy( ( *HC )[ i ] , &cd[ start ] ) ;
}
free( cd ) ;
return OK ;
}//---------无栈非递归遍历霍夫曼树,求霍夫曼编码(从根出发到叶子节点)---------------//
Status HuffmanCodingB( HuffmanTree *HT , HuffmanCode *HC , int *w , int n )
{ //w 存放n个字符的权值(均>0) ,构造霍夫曼树HT, 并求出n个字符的霍夫曼编码HC
int m , i ;
unsigned int c , cdlen ;
int sa , sb ;
HuffmanTree p ;
char *cd ;
if( n <= 1 )
return ERROR ;
m = 2 * n - 1 ; //n为叶子节点数,m为总节点数
( *HT ) = ( HuffmanTree )malloc( ( m + 1 ) * sizeof( HTNode ) ) ; //0号单元未用
//-------------------Initilize ---------------------//
for( p = ( *HT ) + 1 , i = 1 ; i <= n ; ++ i , ++ p , ++ w ) //0号单元未用( Take care about "p=(*HT)+1" !)
{
( *p ).weight = *w ;
( *p ).parent = ( *p ).lchild = ( *p ).rchild = 0 ;
}
for( i = n + 1 ; i <= m ; ++ i , ++ p )
{
( *p ).parent = ( *p ).lchild = ( *p ).rchild = ( *p ).weight = 0 ;
}
//-----------------构建霍夫曼树----------------------//
for( i = n + 1 ; i <= m ; ++ i )
{
Select( *HT , i - 1 , &sa , &sb ) ; //在HT[ 1....(i-1) ]中选择parent为0且weight最小的两个节点,其序号分别为sa,sb ;
( *HT )[ sa ].parent = i ;
( *HT )[ sb ].parent = i ;
( *HT )[ i ].lchild = sa ;
( *HT )[ i ].rchild = sb ;
( *HT )[ i ].weight = ( *HT )[ sa ].weight + ( *HT )[ sb ].weight ;
}//----------------从根出发到叶子节点遍历霍夫曼树,求霍夫曼编码-------------------//
( *HC ) = ( HuffmanCode )malloc( ( n + 1 ) * sizeof( char * ) ) ;
cd = ( char * )malloc( n * sizeof( char ) ) ;
c = m ;
cdlen = 0 ;for( i = 1 ; i <= m ; ++ i )
{
( *HT )[ i ].weight = 0 ; //遍历赫夫曼树时用作结点状态标志
}
while( c )
{
if( ( *HT )[ c ].weight == 0 ) //向左
{
( *HT )[ c ].weight = 1 ;
if( ( *HT )[ c ].lchild != 0 ) //存在孩子,非叶子节点
{
c = ( *HT )[ c ].lchild ;
cd[ cdlen++ ] = '0' ;
}
else
{
if( ( *HT )[ c ].rchild == 0 ) //登记叶子节点的字符的编码(到达叶子节点)
{
( *HC )[ c ] = ( char * )malloc( ( cdlen + 1 ) * sizeof( char ) ) ;
cd[ cdlen ] = '\0' ;
strcpy( ( *HC )[ c ] , cd ) ; //复制编码串
}
}
}//end_if//
else
{
if( ( *HT )[ c ].weight == 1 ) //向右
{
( *HT )[ c ].weight = 2 ;
if( ( *HT )[ c ].rchild != 0 )
{
c = ( *HT )[ c ].rchild ;
cd[ cdlen++ ] = '1' ;
}
}
else //( *HT )[ c ].weight == 2
{
( *HT )[ c ].weight = 0 ;
c = ( *HT )[ c ].parent ;
-- cdlen ; //退到父节点,编码长度减1
}
}
}//end_while//
return OK ;
}Status OutputHuffmanCode( HuffmanTree HT , HuffmanCode HC , int n )
{
int i ;
printf( "\nnumber---weight---huffman code\n" ) ;
for( i = 1 ; i <= n ; i ++ )
{
printf("%6d %6d %10s\n" , i , HT[ i ].weight , HC[ i ] ) ;
}
return OK ;
}//--------------------------------Main Fuction-------------------------------------//
int main ( )
{
HuffmanTree HT ;
HuffmanCode HC ;
int *w , n , i ;
printf( "Input the number of Node:" ) ;
scanf( "%d" , &n ) ;
w = ( int * ) malloc( n * sizeof( int ) ) ;
printf( "Input %d Nodes' weight( int ): " , n ) ;
for( i = 0 ; i < n ; i ++ )
{
scanf( "%d" , w + i ) ;
}
HuffmanCodingB( &HT , &HC , w , n ) ;
// for( i = 1 ; i <= n ; i ++ )
// {
// puts( HC[ i ] ) ;
// printf( "%6s " , HC[ i ] ) ;
// }
// printf( "\n" ) ;
OutputHuffmanCode( HT , HC , n ) ;return 0 ;
}
第六章(7).哈夫曼树及其应用
最新推荐文章于 2022-11-15 20:00:08 发布