哈夫曼编/译码器
建立哈夫曼树:读入文件(*.source),统计文件中字符出现的频度,并以这些字符的频度作为权值,建立哈夫曼树。 编码:利用已建立好的哈夫曼树,获得各个字符的哈夫曼编码,并对正文进行编码,然后输出编码结果,并存入文件(*.code)中。 译码:利用已建立好的哈夫曼树将文件(.code)中的代码进行译码,并输出译码结果,并存入文件( .decode)中。 以下代码可以实现对大部分中文和英文的编码和译码
代码如下:
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# define N 100
# define M 2 * N - 1
typedef struct
{
char ch[ 3 ] ;
int weight;
int Parent, Lchild, Rchild;
} HTNode, HuffmanTree[ M + 1 ] ;
typedef struct
{
char ch[ 3 ] ;
int WEI;
} weighting;
typedef struct
{
char * s;
int len;
} HString;
typedef char * HuffmanCode[ N] ;
void read ( FILE* fp, char str[ ] ) ;
weighting* getweight ( char str[ ] , weighting w[ ] ) ;
void Printvalue ( weighting w[ ] ) ;
void CrtHuffmanTree ( HuffmanTree ht, weighting w[ ] , int n) ;
void select ( HuffmanTree ht, int k, int * s1, int * s2) ;
void PrintTree ( HuffmanTree ht, int n) ;
void CrtHuffmanCode ( HuffmanTree ht, HuffmanCode hc, int n) ;
void PrintCode ( HuffmanCode hc, int n) ;
HString* Coding ( char str[ ] , weighting w[ ] , HuffmanCode hc) ;
void Decoding ( char code[ ] , HuffmanTree ht, int n) ;
int main ( )
{
HuffmanTree H;
HuffmanCode T;
weighting WEI[ 10000 ] ;
HString* STR;
FILE* fp = NULL ;
char str[ 10000 ] ;
char code[ 10000 ] ;
weighting* W;
printf ( "请输入获取电文的文件:" ) ;
read ( fp, str) ;
W = getweight ( str, WEI) ;
int len = W[ 0 ] . WEI;
CrtHuffmanTree ( H, W, len) ;
CrtHuffmanCode ( H, T, len) ;
PrintCode ( T, len) ;
STR = Coding ( str, W, T) ;
printf ( "请输入获取编码的文件:" ) ;
read ( fp, code) ;
Decoding ( code, H, 2 * len - 1 ) ;
return 0 ;
}
void read ( FILE* fp, char str[ ] )
{
char filename[ 40 ] ;
gets_s ( filename) ;
fp = fopen ( filename, "r" ) ;
if ( fp == NULL )
{
printf ( "\nERROR!\n" ) ;
exit ( 0 ) ;
}
int i = 0 ;
char c;
c = fgetc ( fp) ;
if ( c == EOF )
{
printf ( "文件为空!!!" ) ;
exit ( 0 ) ;
}
str[ 0 ] = c;
i++ ;
while ( 1 )
{
c = fgetc ( fp) ;
if ( feof ( fp) )
{
str[ i] = '\0' ;
break ;
}
str[ i] = c;
i++ ;
}
printf ( "文件中读出内容为:\n" ) ;
puts ( str) ;
fclose ( fp) ;
}
void CrtHuffmanTree ( HuffmanTree ht, weighting w[ ] , int n)
{
int m;
m = 2 * n - 1 ;
int i;
for ( i = 1 ; i <= n; i++ )
{
strcpy ( ht[ i] . ch, w[ i] . ch) ;
ht[ i] . weight = w[ i] . WEI;
ht[ i] . Rchild = 0 ;
ht[ i] . Lchild = 0 ;
ht[ i] . Parent = 0 ;
}
for ( i = n + 1 ; i <= m; i++ )
{
ht[ i] . ch[ 0 ] = NULL ;
ht[ i] . weight = 0 ;
ht[ i] . Rchild = 0 ;
ht[ i] . Lchild = 0 ;
ht[ i] . Parent = 0 ;
}
for ( i = n + 1 ; i <= m; i++ )
{
int s1, s2;
select ( ht, i - 1 , & s1, & s2) ;
ht[ i] . weight = ht[ s1] . weight + ht[ s2] . weight;
ht[ i] . Lchild = s1;
ht[ i] . Rchild = s2;
ht[ s1] . Parent = i;
ht[ s2] . Parent = i;
}
int len = w[ 0 ] . WEI;
PrintTree ( ht, 2 * len - 1 ) ;
}
void select ( HuffmanTree ht, int k, int * s1, int * s2)
{
int i;
int j = 0 ;
int min1 = 10000 ;
int min2 = 10000 ;
for ( i = 1 ; i <= k; i++ )
{
if ( ht[ i] . weight <= min1 && ht[ i] . Parent == 0 )
{
* s1 = i;
j = i;
min1 = ht[ i] . weight;
}
}
for ( i = 1 ; i <= k; i++ )
{
if ( i == j) continue ;
else
{
if ( ht[ i] . weight <= min2 && ht[ i] . Parent == 0 )
{
* s2 = i;
min2 = ht[ i] . weight;
}
}
}
}
void CrtHuffmanCode ( HuffmanTree ht, HuffmanCode hc, int n)
{
char * cd;
int i;
int c;
int p;
int start;
cd = ( char * ) malloc ( n * sizeof ( char ) ) ;
cd[ n - 1 ] = '\0' ;
for ( i = 1 ; i <= n; i++ )
{
start = n - 1 ;
c = i;
p = ht[ i] . Parent;
while ( p != 0 )
{
start-- ;
if ( ht[ p] . Rchild == c)
{
cd[ start] = '1' ;
}
else
{
cd[ start] = '0' ;
}
c = p;
p = ht[ p] . Parent;
}
hc[ i] = ( char * ) malloc ( ( n - start) * sizeof ( char ) ) ;
strcpy ( hc[ i] , & cd[ start] ) ;
}
free ( cd) ;
}
void PrintTree ( HuffmanTree ht, int n)
{
int i;
printf ( "\nPrinthuffmantree:\n" ) ;
printf ( "字符\tweight\tParent\tLchild\tRchild\n" ) ;
for ( i = 1 ; i <= n; i++ )
{
if ( ht[ i] . ch[ 0 ] == '\n' )
printf ( "\\n\t%d\t%d\t%d\t%d\n" , ht[ i] . weight, ht[ i] . Parent, ht[ i] . Lchild, ht[ i] . Rchild) ;
else if ( ht[ i] . ch[ 0 ] == NULL )
printf ( "NULL\t%d\t%d\t%d\t%d\n" , ht[ i] . weight, ht[ i] . Parent, ht[ i] . Lchild, ht[ i] . Rchild) ;
else if ( ht[ i] . ch[ 0 ] == ' ' )
printf ( "' '\t%d\t%d\t%d\t%d\n" , ht[ i] . weight, ht[ i] . Parent, ht[ i] . Lchild, ht[ i] . Rchild) ;
else if ( ht[ i] . ch[ 2 ] == '\0' )
printf ( "%s\t%d\t%d\t%d\t%d\n" , ht[ i] . ch, ht[ i] . weight, ht[ i] . Parent, ht[ i] . Lchild, ht[ i] . Rchild) ;
else
printf ( "%c\t%d\t%d\t%d\t%d\n" , ht[ i] . ch[ 0 ] , ht[ i] . weight, ht[ i] . Parent, ht[ i] . Lchild, ht[ i] . Rchild) ;
}
printf ( "\n\n" ) ;
}
void PrintCode ( HuffmanCode hc, int n)
{
int i;
printf ( "\nPrintcode:\n" ) ;
for ( i = 1 ; i <= n; i++ )
{
puts ( hc[ i] ) ;
}
printf ( "\n\n" ) ;
}
weighting* getweight ( char str[ ] , weighting* w)
{
int i = 0 ;
int j = 0 ;
int k;
int m;
char s[ 3 ] ;
int len = strlen ( str) ;
for ( i = 0 , k = 1 ; i < len; i++ , k++ )
{
if ( ( str[ i] < 65 || ( str[ i] > 90 && str[ i] < 97 ) || str[ i] > 122 ) && str[ i] != ' ' && str[ i] != '\n' && str[ i] != '\0' )
{
s[ 0 ] = str[ i] ;
s[ 1 ] = str[ i + 1 ] ;
s[ 2 ] = '\0' ;
i++ ;
strcpy ( w[ k] . ch, s) ;
w[ k] . WEI = 1 ;
}
else
{
w[ k] . ch[ 0 ] = str[ i] ;
w[ k] . WEI = 1 ;
}
}
char str1[ 3 ] ;
int chinese = 0 ;
for ( i = 0 , k = 1 ; i < len; i++ , k++ )
{
if ( ( str[ i] < 65 || ( str[ i] > 90 && str[ i] < 97 ) || str[ i] > 122 ) && str[ i] != ' ' && str[ i] != '\n' && str[ i] != '\0' )
{
str1[ 0 ] = str[ i] ;
str1[ 1 ] = str[ i + 1 ] ;
str1[ 2 ] = '\0' ;
strcpy ( w[ k] . ch, str1) ;
i++ ;
chinese++ ;
}
else
{
w[ k] . ch[ 0 ] = str[ i] ;
}
for ( j = i + 1 ; j <= len; j++ )
{
if ( ( str[ j] < 65 || ( str[ j] > 90 && str[ j] < 97 ) || str[ j] > 122 ) && str[ j] != ' ' && str[ j] != '\n' && str[ j] != '\0' )
{
s[ 0 ] = str[ j] ;
s[ 1 ] = str[ j + 1 ] ;
s[ 2 ] = '\0' ;
j++ ;
if ( strcmp ( str1, s) == 0 )
{
w[ k] . WEI++ ;
}
}
else
{
if ( str[ i] == str[ j] )
{
w[ k] . WEI++ ;
}
}
}
}
for ( i = 1 ; i <= k; i++ )
{
for ( j = i + 1 ; j <= k; j++ )
{
if ( w[ i] . ch[ 2 ] == '\0' )
{
if ( strcmp ( w[ i] . ch, w[ j] . ch) == 0 )
{
w[ j] . WEI = 0 ;
}
}
else
{
if ( w[ i] . ch[ 0 ] == w[ j] . ch[ 0 ] )
{
w[ j] . WEI = 0 ;
}
}
}
}
k = 1 ;
weighting wnew[ 10000 ] ;
for ( i = 1 ; i <= len - chinese; i++ )
{
if ( w[ i] . WEI == 0 ) continue ;
else
{
wnew[ k] = w[ i] ;
k++ ;
}
}
wnew[ 0 ] . WEI = k - 1 ;
w = wnew;
Printvalue ( w) ;
return & w[ 0 ] ;
}
void Printvalue ( weighting w[ ] )
{
int i;
int len = w[ 0 ] . WEI;
printf ( "\nPrintvalue:\n" ) ;
printf ( "字符\tweight\n" ) ;
for ( i = 1 ; i <= len; i++ )
{
if ( w[ i] . ch[ 0 ] == '\n' )
printf ( "\\n\t%d\n" , w[ i] . WEI) ;
else if ( w[ i] . ch[ 0 ] == ' ' )
printf ( "' '\t%d\n" , w[ i] . WEI) ;
else
{
if ( w[ i] . ch[ 2 ] == '\0' )
{
printf ( "%s\t%d\n" , w[ i] . ch, w[ i] . WEI) ;
}
else
{
printf ( "%c\t%d\n" , w[ i] . ch[ 0 ] , w[ i] . WEI) ;
}
}
}
printf ( "\n\n" ) ;
}
HString* Coding ( char str[ ] , weighting w[ ] , HuffmanCode hc)
{
HString HS[ 1000 ] ;
HS-> len = 0 ;
int i;
FILE* fp;
char filename[ 40 ] ;
int len = strlen ( str) ;
int length = w[ 0 ] . WEI;
int j;
char s[ 3 ] ;
int k = 0 ;
char ch;
int flag;
for ( i = 0 ; i < len; i++ )
{
if ( ( str[ i] < 65 || ( str[ i] > 90 && str[ i] < 97 ) || str[ i] > 122 ) && str[ i] != ' ' && str[ i] != '\n' && str[ i] != '\0' )
{
s[ 0 ] = str[ i] ;
s[ 1 ] = str[ i + 1 ] ;
s[ 2 ] = '\0' ;
i++ ;
flag = 1 ;
}
else
{
flag = 0 ;
}
for ( j = 1 ; j <= length; j++ )
{
if ( strcmp ( s, w[ j] . ch) == 0 && flag == 1 )
{
HS[ k] . s = hc[ j] ;
k++ ;
HS-> len++ ;
break ;
}
if ( w[ j] . ch[ 0 ] == str[ i] && flag == 0 )
{
HS[ k] . s = hc[ j] ;
k++ ;
HS-> len++ ;
break ;
}
}
}
printf ( "编码结果:" ) ;
for ( i = 0 ; i < HS-> len; i++ )
{
printf ( "%s" , HS[ i] . s) ;
}
printf ( "\n" ) ;
printf ( "请输入要保存编码的文件:" ) ;
gets_s ( filename) ;
printf ( "\n" ) ;
fp = fopen ( filename, "w" ) ;
if ( fp == NULL )
{
printf ( "\nERROR!\n" ) ;
exit ( 0 ) ;
}
for ( i = 0 ; i < HS-> len; i++ )
{
fprintf ( fp, "%s" , HS[ i] . s) ;
}
fclose ( fp) ;
return HS;
}
void Decoding ( char code[ ] , HuffmanTree ht, int n)
{
printf ( "译码结果将保存至Decode.txt文件\n" ) ;
FILE* fp;
fp = fopen ( "Decode.txt" , "w" ) ;
int i;
int j;
int start = n;
int len = strlen ( code) ;
for ( i = 0 ; i < len; i++ )
{
if ( code[ i] == '1' )
{
for ( j = start; j > 0 ; j++ )
{
if ( ht[ j] . Rchild != 0 )
{
start = ht[ j] . Rchild;
break ;
}
}
if ( ht[ start] . Lchild == 0 )
{
if ( ht[ start] . ch[ 2 ] == '\0' )
{
printf ( "%s" , ht[ start] . ch) ;
fprintf ( fp, "%s" , ht[ start] . ch) ;
start = n;
}
else
{
printf ( "%c" , ht[ start] . ch[ 0 ] ) ;
fprintf ( fp, "%c" , ht[ start] . ch[ 0 ] ) ;
start = n;
}
}
}
if ( code[ i] == '0' )
{
for ( j = start; j > 0 ; j++ )
{
if ( ht[ j] . Lchild != 0 )
{
start = ht[ j] . Lchild;
break ;
}
}
if ( ht[ start] . Lchild == 0 )
{
if ( ht[ start] . ch[ 2 ] == '\0' )
{
printf ( "%s" , ht[ start] . ch) ;
fprintf ( fp, "%s" , ht[ start] . ch) ;
start = n;
}
else
{
printf ( "%c" , ht[ start] . ch[ 0 ] ) ;
fprintf ( fp, "%c" , ht[ start] . ch[ 0 ] ) ;
start = n;
}
}
}
}
fclose ( fp) ;
}