学习笔记:数据结构(四)哈弗曼编码

实现简单的压缩解压

huffma.h头文件

#pragma once
#include "list.h"

typedef unsigned short ui16_t;
typedef unsigned short code_t;
typedef unsigned char code_len_t;

#define LEAF_NUM 256
#define UNLEAF_NUM (LEAF_NUM - 1)
#define UNUSED_INDEX (LEAF_NUM + UNLEAF_NUM)
#define MAGIC_NUM 0xFF;
#define HF_ZIP_TYPE 0x01
#pragma pack(1)
struct huff_tree_node {
	ui16_t parent;
	ui16_t lchild;
	ui16_t rchild;
};

struct huff_table_node {
	size_t weight;
	code_t code;
	code_len_t code_len;
	struct list_head list;
	struct huff_tree_node tree;
};

struct huff_head_info {
	unsigned char magic;
	unsigned char type;
	size_t file_size;
	ui16_t root;
	size_t offset;
	char tab[1];
};
#pragma pack(0)
struct huffman {
	struct huff_table_node tab[LEAF_NUM + UNLEAF_NUM];
	struct list_head *phead;
	ui16_t root;
	FILE *fp_src;
	FILE *fp_dest;
	int (*compress)(struct huffman *phf, const char *origin, const char *zip);
	int (*decompress)(struct huffman *phf, const char *zip, const char *origin);

};

int huffman_init(struct huffman *phf);
int huffman_destroy(struct huffman *phf);

static inline void huff_table_node_init(struct huff_table_node *phftn)
{
	phftn->weight = 0;
	phftn->code = 0;
	phftn->code_len = 0;
	list_head_init(&phftn->list);
	phftn->tree.parent = UNUSED_INDEX;
	phftn->tree.lchild = UNUSED_INDEX;
	phftn->tree.rchild = UNUSED_INDEX;
}

#define WEIGHT(n)	 (phf->tab[n].weight)
#define LIST(n)  	 (phf->tab[n].list)
#define PARENT(n)	 (phf->tab[n].tree.parent)
#define LCHILD(n)	 (phf->tab[n].tree.lchild)
#define RCHILD(n)	 (phf->tab[n].tree.rchild)
#define CODE(n)  	 (phf->tab[n].code)
#define CODE_LEN(n)	 (phf->tab[n].code_len)




#define DELCHILD(n)	 (pheadinfo->tab[n].tree.lchild)
#define DERCHILD(n)	 (pheadinfo->tab[n].tree.rchild)

huffman.c

#include <stdio.h>
#include "huffman.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
	
static int weight_cmp(const struct list_head *a, const struct list_head *b)
{
	struct huff_table_node *pa = container_of(a, struct huff_table_node, list);
	struct huff_table_node *pb = container_of(b, struct huff_table_node, list);
	return pa->weight - pb->weight;
}
static int huff_get_weight(struct huffman *phf)
{
	assert(phf != NULL);
	unsigned char buf = 0;
	while (fread(&buf, 1, 1, phf->fp_src) != 0) {
		WEIGHT(buf) += 1;
	}
#ifdef _DEBUG
{
	int i;
	for (i = 0; i < LEAF_NUM; i++) {
		if (WEIGHT(i) > 0) {
			printf("[DEBUG] tab[%d] %c weigh: %d\n", i, i, WEIGHT(i));
		}
	}
	printf("\n");
}
#endif

	return 0;
}
static int huff_build_list_with_sort(struct huffman *phf)
{
	assert(phf != NULL);
	int i;
	for (i = 0; i < LEAF_NUM; i++) {
		if (WEIGHT(i) > 0) {
			list_add(&LIST(i), phf->phead);
		}
	}
	list_insert_sort(phf->phead, weight_cmp);
#ifdef _DEBUG
{
	struct huff_table_node *pa;
	list_for_each_entry(pa, phf->phead, list) {
		printf("[DEBUG] tab[%d] weight %d --> ", pa - phf->tab, pa->weight);	
	}
	printf("\n");
}
#endif
	return 0;
}
static int huff_build_huff_tree(struct huffman *phf)
{
	assert(phf != NULL);
	struct list_head *a, *b;
	ui16_t index = LEAF_NUM, ia, ib;
	while (phf->phead->next->next != phf->phead) {
		a = phf->phead->next;
		b = a->next;
		ia = container_of(a, struct huff_table_node, list) - phf->tab; /*下标*/
		ib = container_of(b, struct huff_table_node, list) - phf->tab;
	
		WEIGHT(index) = WEIGHT(ia) + WEIGHT(ib);
		PARENT(ia) = index;
		PARENT(ib) = index;
		LCHILD(index) = ia;
		RCHILD(index) = ib;

		node_del_init(a);  /*删除a, b*/
		node_del_init(b);
		
		list_add(&LIST(index), phf->phead); /*把中间节点加到链表中*/
		list_insert_sort(phf->phead, weight_cmp);/*排序*/
#ifdef _DEBUG
{
	printf("[DEBUG] M: tab[%d] weight %d  L: tab[%d] weight %d  R: tab[%d] weight %d\n", index, WEIGHT(index), ia, WEIGHT(ia), ib, WEIGHT(ib));
}
#endif

		index++;
	}
	phf->root = index - 1;
	return 0;
}

static int huff_fill_code(struct huffman *phf)
{
	assert(phf != NULL);
	ui16_t i, cur;
	for (i = 0; i < LEAF_NUM; i++) {   
		if (WEIGHT(i) > 0) {
			cur = i;
			while(cur != phf->root) {/*从叶子节点往root遍历,获得反转了的编码*/
				CODE(i) <<= 1;
				CODE_LEN(i)++;
				if (RCHILD(PARENT(cur)) == cur) {
					CODE(i) |= 1;
				}
				cur = PARENT(cur);
			}
#ifdef _DEBUG
{
	printf("[DEBUG] code %x  code_len %d\n", CODE(i), CODE_LEN(i));
}
#endif
		}
	}
	return 0;
}

static int huff_prepare(struct huffman *phf)
{
	/* 1.遍历文件,获得字符出现的频率 */
	if (huff_get_weight(phf) < 0) {
		fprintf(stderr, "huff_get_weight failed.\n");
		return -1;
	}

	/* 2.构建有序序列 */
	if (huff_build_list_with_sort(phf) < 0) {
		fprintf(stderr, "huff_build_list_with_sort failed.\n");
		return -2;
	}

	/* 3.构建最优二叉树 */
	if (huff_build_huff_tree(phf) < 0) {
		fprintf(stderr, "huff_build_huff_tree failed.\n");
		return -3;
	}
	/* 4.填充字符编码 */
	if (huff_fill_code(phf) < 0) {
		fprintf(stderr, "huff_fill_code failed.\n");
		return -4;
	}
	return 0;
}
static int huff_make_head_info(struct huffman *phf)
{
	size_t huff_table_size = sizeof(struct huff_table_node) * (phf->root + 1);
	struct huff_head_info *pheadinfo = (struct huff_head_info *)malloc(huff_table_size + sizeof(*pheadinfo));
	assert(pheadinfo != NULL);

	pheadinfo->magic = MAGIC_NUM;
	pheadinfo->type = HF_ZIP_TYPE;
	pheadinfo->file_size = ftell(phf->fp_src);
	pheadinfo->root = phf->root;
	pheadinfo->offset = huff_table_size;
	memcpy(pheadinfo->tab, phf->tab, huff_table_size);

	fwrite(pheadinfo, huff_table_size + sizeof(*pheadinfo), 1, phf->fp_dest);

	free(pheadinfo);
	return 0;
}
static int huffman_compress(struct huffman *phf, const char *origin, const char *zip)
{
	assert(phf != NULL);
	if ((phf->fp_src = fopen(origin, "r")) == NULL) {
		fprintf(stderr, "fopen failed.\n");
		return -1;
	}
	if ((phf->fp_dest = fopen(zip, "w")) == NULL) {
		fprintf(stderr, "fopen failed.\n");
		return -2;
	}

	if (huff_prepare(phf) < 0) {
		fprintf(stderr, "huff_prepare faild\n");
		return -3;
	}

	/*写入头部信息*/
	if (huff_make_head_info(phf) < 0) {
		fprintf(stderr, "huff_make_head_info failed\n");
		return -4;
	}

	/*获得正向编码*/
	rewind(phf->fp_src);
	code_t rbuf = 0;
	code_t wbuf = 0;
	code_len_t i = 0;
	unsigned char count = 0;
	while ((fread(&rbuf, 1, 1, phf->fp_src) != 0)) {
		for (i = 0; i < CODE_LEN(rbuf); i++) {
			wbuf <<= 1;
			count++;
			if ((CODE(rbuf) >> i) & 1){
				wbuf |= 1;
			}
			if (count == 8) {  /*如果凑满一个字节,就写入文件*/
				fwrite(&wbuf, 1, 1, phf->fp_dest);
#ifdef _DEBUG
{
	printf(" %x ", wbuf);
}
#endif
				count = 0;
				wbuf = 0;
			}
		}
	}
	/*将剩余不足一个字节的写入文件*/
	if (count != 0) {
		wbuf <<= (8 - count);/*不够的补零,解压的时候删掉*/
		fwrite(&wbuf, 1, 1, phf->fp_dest);
#ifdef _DEBUG
{
	printf(" %x \n", wbuf);
}
#endif
	}
	return 0;
}

static int huff_get_head_info(struct huffman *phf)
{
	struct huff_head_info *pheadinfo = (struct huff_head_info *)malloc(sizeof(*pheadinfo));
	assert(pheadinfo != NULL);
	fread(pheadinfo, sizeof(*pheadinfo) - 1, 1, phf->fp_src);
#ifdef _DEBUG
{
	printf("after pheadinfo... offset %d\n", ftell(phf->fp_src));
}
#endif
	size_t huff_table_size = sizeof(struct huff_table_node) * (pheadinfo->root + 1);
	fread(&phf->tab, huff_table_size, 1, phf->fp_src);
	phf->root = pheadinfo->root;	
#ifdef _DEBUG
{
	printf("after huff_table... offset %d\n", ftell(phf->fp_src));
}
#endif


#ifdef _DEBUG
{
	printf("[DEBUG] pheadinfo->magic = %x\n", pheadinfo->magic);
	printf("[DEBUG] pheadinfo->type = %x\n", pheadinfo->type);
	printf("[DEBUG] pheadinfo->file_size = %d\n", pheadinfo->file_size);
	printf("[DEBUG] pheadinfo->root = %d\n", pheadinfo->root);
	printf("[DEBUG] pheadinfo->offset = %d\n", pheadinfo->offset);
	printf("[DEBUG] huff_table_size = %d\n", huff_table_size);
}
#endif
	return 0;
}
static int huffman_decompress(struct huffman *phf, const char *zip, const char *origin)
{
	assert(phf != NULL);

	if ((phf->fp_src = fopen(zip, "r")) == NULL) {
		fprintf(stderr, "fopen failed.\n");
		return -1;
	}
	if ((phf->fp_dest = fopen(origin, "w")) == NULL) {
		fprintf(stderr, "fopen failed.\n");
		return -2;
	}
	
	/*获取文件头部信息*/
	if (huff_get_head_info(phf) < 0) {
		fprintf(stderr, "huff_prepare faild\n");
		return -3;
	}

#if 1 
	unsigned short i = phf->root;
	char buf = 0;
	char count = 0;
	while ((fread(&buf, 1, 1, phf->fp_src))) {
		for (count = 0; count < 8; count++) {
			if (i < LEAF_NUM) {  
				fwrite(&i, 1, 1, phf->fp_dest);
				i = phf->root;
			}
			if ((buf >> (7 - count)) & 1)	{
				i = RCHILD(i);
			} else {
				i = LCHILD(i);
			}	
		}
//		printf("enter while %d\n",  ftell(phf->fp_src));
	}
	
#endif
	return 0;
}


int huffman_init(struct huffman *phf)
{
	assert(phf != NULL);
	int i;
	for (i = 0; i < LEAF_NUM + UNLEAF_NUM; i++) {
		huff_table_node_init(phf->tab + i);
	}
	phf->phead = (struct list_head *)malloc(sizeof(struct list_head));
	assert(phf->phead != NULL);
	list_head_init(phf->phead);
	phf->root = UNUSED_INDEX;
	phf->fp_src = NULL;
	phf->fp_dest = NULL;
	phf->compress = huffman_compress;
	phf->decompress = huffman_decompress;
	return 0;
}

int huffman_destroy(struct huffman *phf)
{
	assert(phf != NULL);
	fclose(phf->fp_src);
	fclose(phf->fp_dest);
	return 0;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值