哈夫曼编码与压缩效率分析

一、实验原理 
1
、本实验中Huffman编码算法 
(1)
将文件以ASCII字符流的形式读入,统计每个符号的发生频率
(2)
将所有文件中出现过的字符按照频率从小到大的顺序排列
(3)
每一次选出最小的两个值,作为二叉树的两个叶子节点,将和作为它们的根节点,这两个叶子节点不再参与比较,新的根节点参与比较
(4)
重复3,直到最后得到和为1的根节点
(5)
将形成的二叉树的左节点标0,右节点标1,把从最上面的根节点到最下面的叶子节点途中遇到的01序列串起来,得到了各个字符的编码表示。 
2
Huffman编码的数据结构设计,在程序实现中使用一种叫做二叉树的数据结构实现Huffman编码。 
(1)
哈夫曼节点结构
typedef struct huffman_node_tag
{
   unsigned char isLeaf;//
是否为树叶 
   unsigned long count;//
节点代表的符号加权和
   struct huffman_node_tag *parent;//
父节点指针
   union 
   {
       struct 
       {
        struct huffman_node_tag *zero, *one; //
子节点指针,分别代表0,1子节点指针 
        };
       unsigned char symbol;//
节点代表的符号
   };
} huffman_node;

(2)哈夫曼码结构

typedef struct huffman_code_tag 
{
unsigned long numbits;//
该码所用的比特数 
unsigned char *bits; //
指向该码比特串的指针
} huffman_code;

 二、主函数代码

/*
 *  huffman - Encode/Decode files using Huffman encoding.
 *  http://huffman.sourceforge.net
 *  Copyright (C) 2003  Douglas Ryan Richardson; Gauss Interprise, Inc
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "huffman.h"


#define DEBUG true


#ifdef WIN32
#include <winsock2.h>
#include <malloc.h>
#define alloca _alloca
#else
#include <netinet/in.h>
#endif




typedef struct huffman_node_tag
{
unsigned char isLeaf;/*whether this node is leaf*/
unsigned long count;/*how many times the symbol
occured in the information source*/
struct huffman_node_tag *parent;/*parent node indicator*/


union
{/*to save storage.If the node is a leaf,the union stands for the Sequencesymbol;
else the union stands for the indicator of the left and right childnode.
*/
struct
{
struct huffman_node_tag *zero, *one;
};
unsigned char symbol;
};
} huffman_node;


typedef struct huffman_code_tag
{
/* The length of this code in bits. */
unsigned long numbits;


/* The bits that make up this code. The first
  bit is at position 0 in bits[0]. The second
  bit is at position 1 in bits[0]. The eighth
  bit is at position 7 in bits[0]. The ninth
  bit is at position 0 in bits[1]. */
unsigned char *bits;
} huffman_code;


//step2:add by yzhang for huffman statistics
typedef struct huffman_statistics_result
{
float freq[256];
unsigned long numbits[256];
unsigned char bits[256][100];
}huffman_stat;


/*huffman_stat *init_huffstatistics()
{ huffman_stat *p;
    int i;
p = (huffman_stat*)malloc(sizeof(huffman_stat));
p->freq = (float *)malloc(sizeof(float)*256 );
p->numbits = (unsigned long *) malloc(sizeof(unsigned long)*256);
    for (i=0 ; i<256;i++)
p->bits[i] = (unsigned char *)malloc(sizeof(unsigned char)*100); 
return p;
}*/
//end by yzhang




/* A ceiling funtion for numbits/8,
   used to calculate the byte number of huffman_code.
*/
static unsigned long
numbytes_from_numbits(unsigned long numbits)
{
return numbits / 8 + (numbits % 8 ? 1 : 0);
}


/*
 * get_bit returns the ith bit in the bits array
 * in the 0th position of the return value.
 */
static unsigned char
get_bit(unsigned char* bits, unsigned long i)
{
return (bits[i / 8] >> i % 8) & 1;
}
/*In order to reverse the bit order of the whole code.*/
static void
reverse_bits(unsigned char* bits, unsigned long numbits)
{
unsigned long numbytes = numbytes_from_numbits(numbits);
unsigned char *tmp =
   (unsigned char*)alloca(numbytes);
/*The funciton "alloca" applies for space on the stack
and is released automatically.*/
unsigned long curbit;
long curbyte = 0;

memset(tmp, 0, numbytes);


for(curbit = 0; curbit < numbits; ++curbit)
{
unsigned int bitpos = curbit % 8;
/*bitpos:the position,where curbit 
is located in the current byte.*/


if(curbit > 0 && curbit % 8 == 0)
++curbyte;
/*Get inverted bit and put in the 0th bit,
then shift left to the positive bit position. */
tmp[curbyte] |= (get_bit(bits, numbits - curbit - 1) << bitpos);
}


memcpy(bits, tmp, numbytes);
/*Copy "numbytes" bytes from the begining of tmp to "bits"*/
}


/*
 * new_code builds a huffman_code from a leaf in
 * a Huffman tree.
 */
static huffman_code*
new_code(const huffman_node* leaf)
{
/* Build the huffman code by walking up to
* the root node and then reversing the bits,
* since the Huffman code is calculated by
* walking down the tree. */
unsigned long numbits = 0;/*code length*/
unsigned char* bits = NULL;/*the first address of the code*/
huffman_code *p;
/*
*leaf!=NULL: 
   The current huffman_node exists,which needs to be encoded.
*leaf->parent!=NULL:
   The current huffman_node has parent,indicating that
the encoding process of the current node
(backtracking from leaf to root)
has not yet completed.
*/
while(leaf && leaf->parent)
{
huffman_node *parent = leaf->parent;
unsigned char cur_bit = (unsigned char)(numbits % 8);
/*cur_bit:position of the encoding bit in the current byte,
ranging from 0 to 7.*/
unsigned long cur_byte = numbits / 8;
/*cur_byte:
Currently,how many complete bytes has been encoded.*/


/* cur_bit==0:The encoding bit is the first bit of a byte,
and the last byte has completely encoded, 
so it needs to build a new coding byte.
  If we need another byte to hold the code,
  then allocate it. ==> newSize=cur_byte+1
  */
if(cur_bit == 0)
{
size_t newSize = cur_byte + 1;
bits = (char*)realloc(bits, newSize);
bits[newSize - 1] = 0; /* Initialize the new byte. */
/*
Function"realloc" is different from function "malloc".
"Realloc" can reallocate new space in the case of
keeping the original data unchanged.
The original data lies in the front of new space.
(The space's address may be changed.)
*/
}


/* If a "one" must be added then or it in. If a zero
* must be added then do nothing, since the byte
* was initialized to zero. */
if(leaf == parent->one)
bits[cur_byte] |= 1 << cur_bit;
/*Shift 1 left to the encoding bit.*/
++numbits;
leaf = parent;/*backtracking*/
}


if(bits)
reverse_bits(bits, numbits);
/*From the above it can be seen that the encoding process
is from leaf backtracking to root.(Leaf lies in the low bit,and root in the high.)
So the bit order of code is inverted.

  • 3
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值