霍夫曼压缩和解压缩C语言实现

  最近一段时间抽空看了霍夫曼压缩和解压缩的算法,觉得挺巧妙的,就决心自己动手用C实现,没想到算法看看挺简单的,实际都是做起来处处都需要仔细考虑才行,折腾了一周左右终于调试通过了,收获挺大的,付出总是有回报的!

  闲话少说,下面就贴上我的实现代码。霍夫曼压缩解压缩算法需要用到位操作和堆的相关函数。堆的实现在我前面的博客里已经给出实现代码了,这里就不重复贴了。

  1.位实现代码(转载的,感谢!)

/*bit.c  位操作的实现*/
#include <stdlib.h>


/*bit_get  获取缓冲区bits中处于pos位的状态*/
int bit_get(const unsigned char *bits, int pos)
{
    unsigned char  mask;
    int            i;

    /*设置掩码*/
    mask = 0x80;
    for(i=0; i<(pos % 8); i++)
        mask = mask >> 1;
    /*用位与运算获取对应的位*/
    return (((mask & bits[(int)(pos / 8)]) == mask)? 1:0);
}

/*bit_set  设置缓冲区bits中位于pos位的状态*/
void bit_set(unsigned char *bits, int pos, int state)
{
    unsigned char mask;
    int           i;

    /*设置掩码*/
    mask = 0x80;
    for(i=0; i<(pos % 8); i++)
        mask=mask>>1;

    /*依据state设置位*/
    if(state)
        bits[pos/8] = bits[pos/8] | mask;
    else
        bits[pos/8] = bits[pos/8] & (~mask);

    return;
}
/*bit_xor  按位异或运算*/
void bit_xor(const unsigned char *bits1,const unsigned char *bits2,unsigned char *bitsx,int size)
{
    int i;
    /*计算两个缓冲区的按位异或*/
    for(i=0; i<size; i++)
    {
        if(bit_get(bits1,i) != bit_get(bits2,i))
            bit_set(bitsx,i,1);
        else
            bit_set(bitsx,i,0);
    }
    return;
}
/*bit_rot_left 轮转缓冲区bits(含size位),将位值向左移count位*/
void bit_rot_left(unsigned char *bits,int size,int count)
{
    int  fbit,lbit,i,j;

    /*将缓冲区向左轮转指定位数*/
    if(size > 0)
    {
        for(j=0; j<count; j++)
        {
            for(i=0; i<=((size-1)/8); i++)
            {
                /*获得要从当前字节偏移的位*/
                lbit = bit_get(&bits[i],0);
                if(i==0)
                {
                    /*保存要从首字节移动到后面的位*/
                    fbit = lbit;
                }
                else
                {
                    /*将前一字节最右边的位设置为当前字节最左边的位*/
                    bit_set(&bits[i-1],7,lbit);
                }

                /*将当前字节向左移动*/
                bits[i] = bits[i] << 1;
            }
            /*将缓冲区最右边的位设置为从第一个字节偏移的位*/
            bit_set(bits,size-1,fbit);
        }
    }
    return;
}

void bit_print(unsigned char *bits,int bit_size)
{
    for(int i = 0;i < bit_size;i++)
    {
        int val = bit_get(bits,i);
        printf("%d",val);
        if(i % 5 == 0 && i > 0)printf("\n");
    }

    printf("\t\n");
    return;
}

霍夫曼相关数据结构:

#define HUFFMAN_VER         0xAA
#define HUFFMAN_MAX_SYM 256
#define HUFFMAN_LEAF_NODE   1
#define HUFFMAN_TRUNK_NODE  2

typedef struct _huffman_node
{
    HEAP_NODE heap_node;
    char sym;
    int  freq;
    int  type;
    unsigned short bits;
    int bits_size;
    struct _huffman_node *left;
    struct _huffman_node *right;
    struct _huffman_node *parent;
} HUFFMAN_NODE;

typedef struct _huffman_sym_entry
{
    char                sym;
    int                 freq;
    unsigned short      code;
    int                 code_size;
} HUFFMAN_SYM_ENTRY;

typedef struct _huffman_sym_save_entry
{
    char    sym;
    char    reserverd[3];
    int     freq;
}HUFFMAN_SYM_SAVE_ENTRY;


typedef struct _huffman_file_header
{
    int version;
    int bit_size;
    int sym_tbl_entry_num;
    HUFFMAN_SYM_SAVE_ENTRY  sym_save_tbl[0];
    char reserved[8];
}HUFFMAN_FILE_HEADER;


void    huffman_encode(char *file);
void    huffman_decode(char *huffman_file,char *decoded_file);
void    huffman_test(void);

霍夫曼实现代码:

/*
    This file implement Huffman compress and decompress algorithm
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <assert.h>
#include "define.h"
#include "data_struct.h"

static HUFFMAN_SYM_ENTRY huffman_symtbl[HUFFMAN_MAX_SYM] = {0};
static HEAP *huffman_heap = NULL;
static HUFFMAN_NODE huffman_tree;
static int huffman_sym_num;


static void huffman_code_print(unsigned short code,int size)
{
    int bit;

    printf("size=%d\t",size);

    for(bit = 0; bit < size; bit++)
    {
        printf("%d",bit_get(&code,bit));
    }

    printf("\n");

    return;
}

static void huffman_node_print(HUFFMAN_NODE *node)
{
    printf("sym:%c,NF=%d,type=%d\n",node->sym,node->heap_node.pri,node->type);
    huffman_code_print(node->bits,node->bits_size);

    return;
}

static void huffman_tree_print(HUFFMAN_NODE *root)
{
    if(!root)
        return;

    if(root->type == HUFFMAN_LEAF_NODE)
    {
        if(root->parent)
            printf("%c(%c),NF=%d,",root->sym,root->parent->sym,root->heap_node.pri);
        else
            printf("%c(ROOT),NF=%d,",root->sym,root->heap_node.pri);

        huffman_code_print(root->bits,root->bits_size);
        bit_print(&root->bits,root->bits_size);
    }

    if(root->left)
        huffman_tree_print(root->left);

    if(root->right)
        huffman_tree_print(root->right);

    return;
}

static void huffman_symtbl_print(void)
{
    int i;

    printf("total sym num=%d\n",huffman_sym_num);

    for(i = 0; i < HUFFMAN_MAX_SYM; i++)
    {
        if(huffman_symtbl[i].freq)
        {
            printf("%c(%x),freq=%d,code=%x,code_size=%d\n",huffman_symtbl[i].sym,(unsigned char)huffman_symtbl[i].sym,huffman_symtbl[i].freq,huffman_symtbl[i].code,huffman_symtbl[i].code_size);
            bit_print(&huffman_symtbl[i].code,huffman_symtbl[i].code_size);
        }
    }

    return;
}

static HUFFMAN_NODE * huffman_node_new(void)
{
    HUFFMAN_NODE *node = malloc(sizeof(*node));

    memset(node,0,sizeof(*node));

    return node;
}


static void huffman_symtbl_build(char *buf)
{
    char *p = buf;

    while(*p)
    {
        unsigned char ch = (unsigned char)*p;

        if(huffman_symtbl[ch].freq == 0)huffman_sym_num++;

        huffman_symtbl[ch].sym = *p;
        huffman_symtbl[ch].freq++;
        p++;
    }

    return;
}

void huffman_heap_init(void)
{
    int i;

    huffman_heap = heap_init(HEAP_SMALL,sizeof(HUFFMAN_NODE),512);

    for(i = 0; i < HUFFMAN_MAX_SYM; i++)
    {
        if(huffman_symtbl[i].freq)
        {
            HUFFMAN_NODE node;

            node.sym = huffman_symtbl[i].sym;
            node.freq = node.heap_node.pri = huffman_symtbl[i].freq;
            node.type = HUFFMAN_LEAF_NODE;
            node.left = node.right = NULL;

            heap_insert(huffman_heap,&node);
        }
    }

    return;
}

static void  huffman_tree_merge(HUFFMAN_NODE *tree_parent,HUFFMAN_NODE *tree_l,HUFFMAN_NODE *tree_r)
{
    static char new_sym = '@';

    memset(tree_parent,0,sizeof(*tree_parent));

    tree_parent->type = HUFFMAN_TRUNK_NODE;
    tree_parent->sym = new_sym;
    tree_parent->heap_node.pri = tree_l->heap_node.pri + tree_r->heap_node.pri;

    tree_parent->left = tree_l;
    tree_parent->right = tree_r;

    tree_l->parent = tree_r->parent = tree_parent;

    return;
}

static void huffman_tree_build(void)
{
    int rc1,rc2;
    HUFFMAN_NODE *node_l,*node_r,*node_parent;

    /* 1. init huffman heap */
    huffman_heap_init();
    /* 2. begin build tree */
    while(1)
    {
        if(HEAP_NODE_NUM(huffman_heap) < 2)
            break;

        node_l = huffman_node_new();
        node_r = huffman_node_new();
        node_parent = huffman_node_new();

        rc1 = heap_get(huffman_heap,node_l);
        rc2 = heap_get(huffman_heap,node_r);

        huffman_tree_merge(node_parent,node_l,node_r);

        heap_insert(huffman_heap,node_parent);
    }

    rc1 = heap_get(huffman_heap,&huffman_tree);
    assert(rc1 == 0);

    return;

}

static void   huffman_symtbl_code(HUFFMAN_NODE *node,unsigned short bits,int bits_size)
{
    int bit;

    assert(node);

    node->bits = bits;
    node->bits_size = bits_size;

    if(node->sym != '@')assert(huffman_symtbl[node->sym].code == 0);

    huffman_symtbl[node->sym].code = bits;
    huffman_symtbl[node->sym].code_size = bits_size;

    bit = bits_size;
    bits_size++;

    if(node->left)
    {
        unsigned short l_bits = bits;
        bit_set(&l_bits,bit,0);
        huffman_symtbl_code(node->left,l_bits,bits_size);
    }

    if(node->right)
    {
        unsigned short r_bits = bits;
        bit_set(&r_bits,bit,1);
        huffman_symtbl_code(node->right,r_bits,bits_size);
    }

    return;
}

static void  huffman_char2code(unsigned char *p,int start_bit,unsigned short code,int code_size)
{
    int bit;

    for(bit = 0;bit < code_size;bit++)
    {
        unsigned char bit_val = bit_get(&code,bit);
        bit_set(p,bit+start_bit,bit_val);
    }

    return;
}


unsigned char* huffman_buf_encode(char *buf,int size,int *buf_bit_size)
{
    assert(size > 0);

    unsigned char *buf_encoded = malloc(size);
    int start_bit = 0;
    int i;

    for(i = 0;i < size;i++)
    {
        char ch = buf[i];
        huffman_char2code(buf_encoded,start_bit,huffman_symtbl[ch].code,huffman_symtbl[ch].code_size);
        start_bit += huffman_symtbl[ch].code_size;
    }

    *buf_bit_size = start_bit;

    return buf_encoded;
}

static  void huffman_file_create(unsigned char *buf,int bit_size)
{
    int encoded_size;
    HUFFMAN_FILE_HEADER *file_header = NULL;
    int header_size = sizeof(HUFFMAN_FILE_HEADER) + huffman_sym_num*sizeof(HUFFMAN_SYM_SAVE_ENTRY);

    assert(bit_size > 0);

    encoded_size = bit_size/8;
    if(bit_size % 8)
        encoded_size++;

    file_header = (HUFFMAN_FILE_HEADER *)malloc(header_size);
    assert(file_header);

    file_header->version = HUFFMAN_VER;
    file_header->bit_size = bit_size;

    printf("version=%x,bit size=%d\n",HUFFMAN_VER,bit_size);

    file_header->sym_tbl_entry_num = huffman_sym_num;
    int j = 0;
    for(int i = 0;i < HUFFMAN_MAX_SYM;i++)
    {
        if(huffman_symtbl[i].freq == 0)continue;

        file_header->sym_save_tbl[j].sym = huffman_symtbl[i].sym;
        file_header->sym_save_tbl[j].freq = huffman_symtbl[i].freq;
        j++;

        printf("encoded sym=%c,freq=%d\n",huffman_symtbl[i].sym,huffman_symtbl[i].freq);
    }

    assert(j == huffman_sym_num);

    file_write("huffman_file",file_header,header_size);
    file_append("huffman_file",buf,encoded_size);

    return;
}



void huffman_encode(char *file)
{
    int size;
    char *buf_encoded = NULL;

    /* 1. read file into buf */
    char *buf = file_read(file,&size);
    assert(buf);
    /* 2.scan buf into huffman_symtbl */
    huffman_symtbl_build(buf);
    /* 3. build huffman tree */
    huffman_tree_build();
    /* 4. set code for each sym */
    unsigned short bits = 0;
    huffman_symtbl_code(&huffman_tree,bits,0);
    /* 5. begin compress buf with the code */
    int total_bit_size;
    buf_encoded = huffman_buf_encode(buf,size,&total_bit_size);
    /* 6. write encoded buf into HUFFMAN file */
    huffman_file_create(buf_encoded,total_bit_size);

    huffman_tree_print(&huffman_tree);

    free(buf);

    return;
}

static char huffman_decode_char(HUFFMAN_NODE *node,char *pbits,int pos,int *char_bits)
{
    int bit_value;
    char result;

    assert(node);

    if(node->type == HUFFMAN_LEAF_NODE)
    {
        *char_bits = node->bits_size;

        return node->sym;
    }

    bit_value = bit_get(pbits,pos);
    pos++;

    if(bit_value)
        result = huffman_decode_char(node->right,pbits,pos,char_bits);
    else
        result = huffman_decode_char(node->left,pbits,pos,char_bits);

    return result;
}

static  void huffman_symtbl_decoded(HUFFMAN_FILE_HEADER *file_header)
{
    assert(file_header);

    huffman_sym_num = file_header->sym_tbl_entry_num;

    for(int i = 0;i < huffman_sym_num;i++)
    {
        char sym = file_header->sym_save_tbl[i].sym;

        printf("decoded sym=%c,freq=%d\n",file_header->sym_save_tbl[i].sym,file_header->sym_save_tbl[i].freq);
        huffman_symtbl[sym].sym = file_header->sym_save_tbl[i].sym;
        huffman_symtbl[sym].freq = file_header->sym_save_tbl[i].freq;
    }

    return;
}


void huffman_decode(char *huffman_file,char *decoded_file)
{
    int buf_size,bit_size;
    int decoded_buf_size;

    char *buf = file_read(huffman_file,&buf_size);
    assert(buf && buf_size > 0);

    HUFFMAN_FILE_HEADER *file_header = (HUFFMAN_FILE_HEADER *)buf;

    huffman_symtbl_decoded(file_header);

    huffman_tree_build();

    int header_size = sizeof(HUFFMAN_FILE_HEADER) + huffman_sym_num*sizeof(HUFFMAN_SYM_SAVE_ENTRY);

    unsigned short bits = 0;
    huffman_symtbl_code(&huffman_tree,bits,0);

    huffman_tree_print(&huffman_tree);

    bit_size = file_header->bit_size;
    decoded_buf_size = bit_size/8 *10; //suppose compressed at max 10%

    printf("decoded bit size=%d\n",bit_size);

    char *decoded_buf = malloc(decoded_buf_size);
    assert(decoded_buf);

    char *pbits = buf + header_size;

    int bit_pos = 0;
    int decoded_char_num = 0;

    bit_print(pbits,120);

    while(bit_size > 0)
    {
        int char_bits = 0;

        assert(decoded_char_num < decoded_buf_size-1);

        decoded_buf[decoded_char_num] = huffman_decode_char(&huffman_tree,pbits,bit_pos,&char_bits);

        printf("decoded char=%c\n",decoded_buf[decoded_char_num]);

        decoded_char_num++;
        bit_size -= char_bits;
        bit_pos += char_bits;
    }

    decoded_buf[decoded_char_num] = '\0';

    printf("decode finished!decoded file size=%d,mallocd size=%d\n",decoded_char_num,decoded_buf_size);

    /* write to file */
    file_write(decoded_file,decoded_buf,decoded_char_num);

    free(decoded_buf);

    return;
}

 

  • 2
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
综合实验: 1. 问题描述 利用哈夫曼编码进行通信可以大大提高信道利用率,缩短信息传输时间,降低传输成本。这要求在发送端通过一个编码系统对待传输数据预先编码,在接收端将传来的数据进行译码(复原)。对于双工信道(即可以双向传输信息的信道),每端都需要一个完整的编/译码系统。试为这样的信息收发站编写一个哈夫曼码的编/译码系统。 2. 基本要求 一个完整的系统应具有以下功能: (1) I:初始化(Initialization)。从终端读入字符集大小n,以及n个字符和n个权值,建立哈夫曼树,并将它存于文件hfmTree中。 (2) E:编码(Encoding)。利用已建好的哈夫曼树(如不在内存,则从文件hfmTree中读入),对文件ToBeTran中的正文进行编码,然后将结果存入文件CodeFile中。 (3) D:译码(Decoding)。利用已建好的哈夫曼树将文件CodeFile中的代码进行译码,结果存入文件Textfile中。 (4) P:印代码文件(Print)。将文件CodeFile以紧凑格式显示在终端上,每行50个代码。同时将此字符形式的编码文件写入文件CodePrin中。 (5) T:印哈夫曼树(Tree printing)。将已在内存中的哈夫曼树以直观的方式(比如树)显示在终端上,同时将此字符形式的哈夫曼树写入文件TreePrint 中。 3. 测试数据 用下表给出的字符集和频度的实际统计数据建立哈夫曼树,并实现以下报文的编码和译码:“THIS PROGRAME IS MY FAVORITE”。 字符 A B C D E F G H I J K L M 频度 186 64 13 22 32 103 21 15 47 57 1 5 32 20 字符 N O P Q R S T U V W X Y Z 频度 57 63 15 1 48 51 80 23 8 18 1 16 1
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值