使用哈夫曼树将字符重新编码(压缩数据)

1 篇文章 0 订阅
1 篇文章 0 订阅

huffmanTree.c

#include "huffmanTree.h"


HuffmanTree new_huffman_tree_node(data_size data)
{
    //申请对内存给节点
    HuffmanTree tmp = (HuffmanTree)malloc(sizeof(HuffmanTreeNode));
    if(tmp != NULL)   //申请成功
    {
        tmp->parentNode = 0;
        tmp->leftSubtree = 0;
        tmp->rightSubtree = 0;
        tmp->data = data;
        tmp->weight = 0;
        tmp->HuffCode.code=0;
        tmp->HuffCode.codelength=0;
    }
    else
    {
        printf("申请失败\n");
        exit(0);
    }
    return tmp;
}

void add_leftSubtree(HuffmanTree parentNode, HuffmanTree leftSubtreeNode)
{
    parentNode->leftSubtree = leftSubtreeNode;
}

void add_rightSubtree(HuffmanTree parentNode, HuffmanTree rightSubtreeNode)
{
    parentNode->rightSubtree = rightSubtreeNode;
}

void add_parentNode(HuffmanTree node, HuffmanTree parentNode)
{
    node->parentNode = parentNode;
}

void code_huffman_tree(HuffmanTree node, bool bit)
{
    static unsigned short bit_code=0;
    static unsigned short bit_len=0;
    if(node == NULL)
        return;
    if(node->is_data)
    {
        node->HuffCode.codelength = bit_len;
    }
    bit_len++;
    if(bit==1)
        bit_code=(bit_code<<1)|bit;
    else bit_code=(bit_code<<1)&(~(1));
    if(node->is_data)
    {
        node->HuffCode.code = bit_code;
    }
    code_huffman_tree(node->leftSubtree, 0);
    code_huffman_tree(node->rightSubtree, 1);
    bit_code >>= 1;
    bit_len--;
}

void find_huffman_tree(HuffmanTree node, data_size data, HuffmanTree *getnode)
{
    if(node == NULL)
        return;
    if(node->data == data && node->is_data)
        *getnode = node;
    find_huffman_tree(node->leftSubtree, data, getnode);
    find_huffman_tree(node->rightSubtree, data, getnode);
}

HuffmanTree ret_node(HuffmanTree root, bool is)
{
    if(is)
        return root->rightSubtree;
    else return root->leftSubtree;
}

int count_huffman_tree(HuffmanTree node)
{
    static int cout = 0;
    if(node == NULL)
        return cout;
    if(node->is_data)
        cout++;
    count_huffman_tree(node->leftSubtree);
    count_huffman_tree(node->rightSubtree);
}

unsigned long long get_huffman_tree(HuffmanTree node)
{
    static unsigned long long num = 0;
    if(node == NULL)
        return num;
    if(node->is_data)
        num += ((node->HuffCode).codelength * node->weight);
    get_huffman_tree(node->leftSubtree);
    get_huffman_tree(node->rightSubtree);
}

void show_huffman_tree(HuffmanTree node)
{
    if(node == NULL)
        return;
    if(node->is_data==1)
    {
        printf("bool:%d, data:%d, 权:%d\n", node->is_data, node->data, node->weight);
        printf("%d%d%d%d %d%d%d%d %d%d%d%d %d%d%d%d\n"
        ,is_bool(node->HuffCode.code, 15)\
        ,is_bool(node->HuffCode.code, 14)\
        ,is_bool(node->HuffCode.code, 13)\
        ,is_bool(node->HuffCode.code, 12)\
        ,is_bool(node->HuffCode.code, 11)\
        ,is_bool(node->HuffCode.code, 10)\
        ,is_bool(node->HuffCode.code, 9)\
        ,is_bool(node->HuffCode.code, 8)\
        ,is_bool(node->HuffCode.code, 7)\
        ,is_bool(node->HuffCode.code, 6)\
        ,is_bool(node->HuffCode.code, 5)\
        ,is_bool(node->HuffCode.code, 4)\
        ,is_bool(node->HuffCode.code, 3)\
        ,is_bool(node->HuffCode.code, 2)\
        ,is_bool(node->HuffCode.code, 1)\
        ,is_bool(node->HuffCode.code, 0)\
        );
        printf("code_len:%d\n\n",node->HuffCode.codelength);
    }
    show_huffman_tree(node->leftSubtree);
    show_huffman_tree(node->rightSubtree);
}

huffmanTree.h

#ifndef HUFFMANTREE_H
#define HUFFMANTREE_H

#ifdef __cplusplus
extern "C"
{
#endif

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>

#define Clear(x, y) ((x) &= (~(1<<y)))
#define Set(x, y) ((x) |= (1<<y))
#define is_bool(x, y) (((x) >> (y)) & 1)


#define is_data_size bool
#define code_size unsigned short
#define codelength_size unsigned short
#define weight_size unsigned short
#define data_size unsigned short
#define parentNode_size struct HuffmanTreeNode *
#define leftSubtree_size struct HuffmanTreeNode *
#define rightSubtree_size struct HuffmanTreeNode *

typedef struct
{
	code_size code;
	codelength_size codelength;
} HuffmanTreeCode;

typedef struct HuffmanTreeNode
{
    is_data_size is_data;           //判断
    weight_size weight;             //权
    data_size data;                 //数据
	parentNode_size parentNode;     //父节点
    leftSubtree_size leftSubtree;   //左子树
    rightSubtree_size rightSubtree; //右子树
    HuffmanTreeCode HuffCode;
} HuffmanTreeNode, *HuffmanTree;    //哈夫曼节点

HuffmanTree new_huffman_tree_node(data_size data);
void add_leftSubtree(HuffmanTree parentNode, HuffmanTree leftSubtreeNode);
void add_rightSubtree(HuffmanTree parentNode, HuffmanTree rightSubtreeNode);
void add_parentNode(HuffmanTree node, HuffmanTree parentNode);
void code_huffman_tree(HuffmanTree node, bool bit); 
unsigned long long get_huffman_tree(HuffmanTree node);
void find_huffman_tree(HuffmanTree node, data_size data, HuffmanTree *getnode);
int count_huffman_tree(HuffmanTree node);
void show_huffman_tree(HuffmanTree node);
HuffmanTree ret_node(HuffmanTree root, bool is);
#ifdef __cplusplus
}
#endif

#endif

seekList.c

#include "seekList.h"

linklist init_list()
{
    linklist p = (linklist)malloc(sizeof(listnode));
    if(p != NULL)
    {
        p->prev = p;
        p->next = p;
    }
    return p;
}

linklist new_node(HuffmanTree node)
{
    //申请对内存给节点
    linklist p = (linklist)malloc(sizeof(listnode));
    if(p != NULL)   //申请成功
    {
        //指向哈夫曼树结点
        p->node=node;
        //将节点指向自己
        p->next = p;
        p->prev = p;
    }
    return p;
}

void list_add_tail(linklist head, linklist p)
{
    head->prev->next = p;
    p->prev = head->prev;
    p->next = head;
    head->prev = p;
}

linklist find_node(linklist head, data_size data)
{
    //创建一个临时变量,并指向head下一个节点准备遍历
    linklist tmp = head->next;
    //进行循环遍历
    while(tmp != head)
    {
        if((tmp->node)->data==data)
            return tmp;
        tmp = tmp->next;
    }
    return NULL;
}

linklist find_min_weight(linklist head)
{
    weight_size min_weight=((head->next)->node)->weight;
    linklist p = head->next;
    for(linklist tmp=head->next;tmp!=head;tmp=tmp->next)
    {
        if(tmp->node->weight < min_weight)
        {
            min_weight = tmp->node->weight;
            p = tmp;
        }
    }
    return p;
}


void list_del(linklist head, linklist p)
{
    p->prev->next = p->next;
    p->next->prev = p->prev;
    p->next = p;
    p->prev = p;
}

void list_show(linklist head)
{
    int i=1;
    for(linklist tmp=head->next;tmp!=head;tmp=tmp->next)
    {
        printf("%d,数据:%d, 权:%d\n", i, tmp->node->data, tmp->node->weight);
        i++;
    }
}

seekList.h

#ifndef SEEKLIST_H
#define SEEKLIST_H

#ifdef __cplusplus
extern "C"
{
#endif

#include "huffmanTree.h"

typedef struct node
{
    HuffmanTree node;
    struct node *prev;
    struct node *next;
}listnode, *linklist;

linklist init_list();

linklist new_node(HuffmanTree node);

void list_add_tail(linklist head, linklist p);

linklist find_node(linklist head, data_size data);

void list_del(linklist head, linklist p);

linklist find_min_weight(linklist head);

void list_show(linklist head);

#ifdef __cplusplus
}
#endif

#endif //SEEKLIST_H

main.c

#include "huffmanTree.h"
#include "seekList.h"
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

#define PATH "/home/qyb/test/main.c"

int main(void)
{
	struct stat fileinfo;
	memset(&fileinfo, 0, sizeof(struct stat));
	stat(PATH, &fileinfo);
	unsigned char *pixel = (unsigned char *)malloc(fileinfo.st_size);
	FILE *fp = fopen(PATH, "r");
	if(fp == NULL)
	{
		perror("fopen失败");
		exit(0);
    }
	int ret = fread(pixel, fileinfo.st_size, 1, fp);
	for(int i=0;i<fileinfo.st_size;i++)
		printf("%d ",pixel[i]);
	fclose(fp);
	return 0;
	/*
	//原始数据
	unsigned char pixel[8*8] = //64 * 64 bit
	{ 
        141, 101, 126, 111, 163, 112, 133, 156,
        103, 144, 111, 176, 117, 120, 188, 187,
        175, 164, 190, 156, 112, 179, 142, 119,
        140, 111, 127, 186, 196, 190, 189, 127,
        185, 103, 185, 110, 192, 139, 159, 104,
        151, 193, 178, 198, 114, 170, 179, 149,
        124, 149, 165, 108, 141, 176, 113, 164,
        101, 140, 120, 126, 173, 189, 158, 184
		
	};
	*/
	linklist seek_head = init_list();
	if(seek_head == NULL)
	{
		perror("创建失败");
		exit(0);
	}
	for(int i=0;i<fileinfo.st_size;i++)
	{
		//查找数据
		linklist findNode = find_node(seek_head, pixel[i]);
		if(findNode != NULL)
		{
			//权加1
			(findNode->node)->weight++;
		}
		else
		{
			//先创建哈夫曼树结点,并添加数据
			HuffmanTree node = new_huffman_tree_node(pixel[i]);
			//权加1
			node->weight++;
			//有效数据
			node->is_data = true;
			//将哈夫曼树结点添加到查找链表中
			linklist newNode = new_node(node);
			//将查找链表结点入表
			list_add_tail(seek_head, newNode);
		}
	}
	
	HuffmanTree huffman_root;
	list_show(seek_head);
	
	while((seek_head->next)->next!=seek_head)
	{
		//找两个最小的权
		linklist leftNode = find_min_weight(seek_head);
		if(leftNode==NULL)
		{
			printf("leftNode NULL\n");
			return 0;
		}
		//将刚才的左右结点删除掉
		list_del(seek_head, leftNode);
		linklist rightNode = find_min_weight(seek_head);
		if(rightNode==NULL)
		{
			printf("rightNode NULL\n");
			return 0;
		}
		//将刚才的左右结点删除掉
		list_del(seek_head, rightNode);
		//先创建哈夫曼树结点,并添加数据
		HuffmanTree node = new_huffman_tree_node(0);
		//权相加赋值给父结点
		node->weight = leftNode->node->weight + rightNode->node->weight;
		//无效数据
		node->is_data = false;
		//进行关联
		add_leftSubtree(node, leftNode->node);
		add_rightSubtree(node, rightNode->node);
		add_parentNode(leftNode->node, node);
		add_parentNode(rightNode->node, node);
		//将哈夫曼树结点添加到查找链表中
		linklist newNode = new_node(node);
		//将查找链表结点入表
		list_add_tail(seek_head, newNode);
		//释放资源
		free(leftNode);
		free(rightNode);
		//将根节点保存
		huffman_root = node;
	}
	code_huffman_tree(huffman_root, 0);
	show_huffman_tree(huffman_root);
	
	unsigned long long size = get_huffman_tree(huffman_root);
	unsigned long long m_size = size;
	if(size%8!=0)
		size+=(8-(size%8));
	//printf("%lld\n",size/8);
	unsigned char *str = (unsigned char *)malloc(size/8);
	memset(str, 0, size/8);
	unsigned long long startAddress = 0;
	unsigned short front = 0;
	unsigned short after = 0;
	int len = 0;
	//int count = count_huffman_tree(huffman_root);
	HuffmanTree findNode;
	int i=0;
	for(int i=0;i<fileinfo.st_size;i++)
	{
		find_huffman_tree(huffman_root, *(pixel+i), &findNode);
		//printf("%d\n",i);
		front = startAddress % 8;
		after = 8 - front;
		if(after >= ((findNode->HuffCode).codelength))
		{
			len = findNode->HuffCode.codelength;
			while(len)
			{
				if(is_bool(findNode->HuffCode.code, len-1))
					Set(*( str + ((int)(startAddress/8)) ), after-1);
				else Clear(*( str + ((int)(startAddress/8)) ), after-1);
				after--; len--;
			}
			startAddress += findNode->HuffCode.codelength;
		}
		else
		{
			len = findNode->HuffCode.codelength;
			while(after)
			{
				if(is_bool(findNode->HuffCode.code, len-1))
					Set(*( str + ((int)(startAddress/8)) ), after-1);
				else Clear(*( str + ((int)(startAddress/8)) ), after-1);
				len--; after--;
			}
			startAddress += findNode->HuffCode.codelength - len;
			front = startAddress % 8;
			after = 8 - front;
			int newlen=len;
			if(after >= len)
			{
				while(len)
				{
					if(is_bool(findNode->HuffCode.code, len-1))
						Set(*( str + ((int)(startAddress/8)) ), after-1);
					else Clear(*( str + ((int)(startAddress/8)) ), after-1);
					after--; len--;
				}
				startAddress += newlen;
			}
			else
			{
				while(after)
				{
					if(is_bool(findNode->HuffCode.code, len-1))
						Set(*( str + ((int)(startAddress/8)) ), after-1);
					else Clear(*( str + ((int)(startAddress/8)) ), after-1);
					len--; after--;
				}
				startAddress += newlen - len;
				front = startAddress % 8;
				after = 8 - front;
				
				while(len)
				{
					if(is_bool(findNode->HuffCode.code, len-1))
						Set(*( str + ((int)(startAddress/8)) ), after-1);
					else Clear(*( str + ((int)(startAddress/8)) ), after-1);
					after--; len--;
				}
				startAddress += 8-after;
			}
		}
		
	}
	printf("压缩后字节:%lld\n",size/8);
	/*
	printf("压缩后:\n");
	for(int i=0;i<size/8;i++)
		printf("%d ", *(str+i));
	printf("\n");
	*/
	
	startAddress = 0;
	HuffmanTree retNode = huffman_root;
	front = 8-(startAddress%8);
	long long int m_index = 0;
	unsigned char *p_str = (unsigned char *)malloc(fileinfo.st_size);
	memset(p_str, 0, fileinfo.st_size);
	for(int i=0;i<m_size;i++)
	{
		retNode = ret_node(retNode, is_bool(*(str+(startAddress/8)), front-1));
		if(retNode->is_data==1)
		{
			memcpy(p_str+m_index, &retNode->data, 1);
			m_index++;
			retNode=huffman_root;
		}
		startAddress++;
		front = 8-(startAddress%8);
	}
	
	printf("解压后字节:%ld\n", fileinfo.st_size);
	/*
	printf("解压后\n");
	for(int i=0;i<fileinfo.st_size;i++)
		printf("%d ", p_str[i]);
	*/
	
	FILE *fp1 = fopen("PATH", "w");
	if(fp1 == NULL)
	{
		perror("fopen失败");
		exit(0);
    }
	fwrite(p_str, m_index, 1, fp1);
	fclose(fp1);	
	
	free(str);
	free(p_str);
	
	return 0;
}

总共五个文件
huffmanTree.c
huffmanTree.h
seekList.c
seekList.h
main.c

功能可以将一个文件(小文件),通过哈夫曼树进行重新编码,从而进行压缩数据

解压缺少部分:
解压部分没有重新构建哈夫曼树
没有将编码后对应关系表加到压缩数据中
仅仅只对数据进行哈夫曼树重编码

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

到达想去的地方

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值