基于哈夫曼树的数据压缩算法(C语言)

头文件(head.h):

#define STACK_INIT_SIZE 100
#define STACKINCREMENT 10
typedef int ElemType;
typedef int Status;
typedef struct {
	ElemType* base;
	ElemType* top;
	int stacksize;
}SqStack, * SqStacklist;

SqStacklist InitStack();
void  Push(SqStacklist S, ElemType e);
ElemType Pop(SqStacklist S);
void PrintStack(SqStacklist S);


void exch_char(char a[], int i, int j);
void swim_char(char a[], int k);
void sink_char(char a[], int k, int N);
void BiTree_sort_char(char a[]);


void exch_int(int a[], int i, int j);
void swim_int(int a[], int k);
void sink_int(int a[], int k, int N);
void BiTree_sort_int(int a[], int a_length);


void move_char(char a[]);
void move_int(int a[]);


typedef struct {
	int weight, parent, lchild, rchild;
}HTNode, * HuffmanTree;

void move_HuffmanTree(HuffmanTree HT, int HT_length);
void determine_w(char str[], char a_w[], int w[]);
void print_determine_w(char a_w[], int w[]);
void Select(HuffmanTree HT, int N, int* s1, int* s2);
void HuffmanCoding(HuffmanTree HT, int a[], int n);
int find_order(HuffmanTree HT, int location, int n);
void find_HT_node(HuffmanTree HT, int location, int N, SqStacklist S);
void print_HT_node(HuffmanTree HT, int n, char a_w[], SqStacklist S);
void solve_number(HuffmanTree HT, int n, char a_int[], char a_char[], char a_w[]);
void printHuffmanCoding(HuffmanTree HT, int n);

头文件中函数实现(head.cpp):

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include <string.h>
#include <malloc.h>
#include"head.h"



SqStacklist InitStack() {          //创建空栈
	SqStacklist S;
	S = (SqStacklist)malloc(sizeof(SqStack));
	S->base = (ElemType*)malloc(STACK_INIT_SIZE * sizeof(ElemType));
	S->top = S->base;
	S->stacksize = STACK_INIT_SIZE;   //当前内存大小
	return S;
}

void  Push(SqStacklist S, ElemType e) {       //栈顶插入元素
	if (S->top - S->base >= S->stacksize) {
		S->base = (ElemType*)realloc(S->base, (S->stacksize + STACKINCREMENT) * sizeof(ElemType));  //重新为S->base分配内存
		S->top = S->base + S->stacksize;
		S->stacksize += STACKINCREMENT;
	}
	*S->top++ = e;                    //相当于*S->top=e; S->top++;
}

ElemType Pop(SqStacklist S) {               //用e返回栈顶元素,并且删除栈顶元素
	if (S->top == S->base) return 0;
	ElemType e = *--S->top;
	return e;
}

void PrintStack(SqStacklist S) {         //输出栈中所有元素,并清空栈
	while (S->top != S->base) {
		printf("%d", Pop(S));       //S已经是指针,不需要取地址了
	}
}



void exch_char(char a[], int i, int j) {	//交换a[i]和a[j]的值
	char temp;
	temp = a[i];
	a[i] = a[j];
	a[j] = temp;
}

void swim_char(char a[], int k) {	//上浮第k位置的元素 至合适的位置(树由上到下降序)
	while (k > 1 && a[k / 2] < a[k]) {
		exch_char(a, k / 2, k);
		k = k / 2;
	}
}

void sink_char(char a[], int k, int N) {	//下沉第k位置元素,优先与左右孩子中最大的互换位置(树由上到下降序)
	while (2 * k <= N) {                //N为数组a长度
		int j = 2 * k;
		if (j < N && a[j] < a[j + 1]) j++;
		if (!(a[k] < a[j])) break;
		exch_char(a, k, j);
		k = j;
	}
}

void BiTree_sort_char(char a[]) {	//二叉树排序,a_length为数组a的总长度减1(即去除a[0]后的长度)
	int a_length = strlen(a) - 1;
	for (int k = a_length / 2; k >= 1; k--) {	//使二叉树有序(由上到下降序)
		sink_char(a, k, a_length);
	}

	while (a_length > 1) {		//由上到下升序排序
		exch_char(a, 1, a_length--);
		sink_char(a, 1, a_length);
	}
}



void exch_int(int a[], int i, int j) {	//交换a[i]和a[j]的值
	int temp;
	temp = a[i];
	a[i] = a[j];
	a[j] = temp;
}

void swim_int(int a[], int k) {	//上浮第k位置的元素 至合适的位置(树由上到下降序)
	while (k > 1 && a[k / 2] < a[k]) {
		exch_int(a, k / 2, k);
		k = k / 2;
	}
}

void sink_int(int a[], int k, int N) {	//下沉第k位置元素,优先与左右孩子中最大的互换位置(树由上到下降序)
	while (2 * k <= N) {
		int j = 2 * k;
		if (j < N && a[j] < a[j + 1]) j++;
		if (!(a[k] < a[j])) break;
		exch_int(a, k, j);
		k = j;
	}
}

void BiTree_sort_int(int a[], int a_length) {	//二叉树排序,a_length为数组a的有效长度减1(即去除a[0]后的长度)
	for (int k = a_length / 2; k >= 1; k--) {	//使二叉树有序(由上到下降序)
		sink_int(a, k, a_length);
	}

	while (a_length > 1) {		//由上到下升序排序
		exch_int(a, 1, a_length--);
		sink_int(a, 1, a_length);
	}
}



void move_char(char a[]) {      //将数组a整体向后移动一位,让a[0]失效
	for (int i = strlen(a); i > 0; i--) {
		a[i] = a[i - 1];
	}
}

void move_int(int a[], int a_length) {      //将数组a整体向后移动一位,让a[0]失效
	for (int i = a_length; i > 0; i--) {
		a[i] = a[i - 1];
	}
}



void move_HuffmanTree(HuffmanTree HT, int HT_length) { //将哈夫曼结构体数组整体向后移动一位,不用HT[0],从而下标从1开始
	for (int i = HT_length; i > 0; i--) {
		HT[i].weight = HT[i - 1].weight;
		HT[i].parent = HT[i - 1].parent;
		HT[i].lchild = HT[i - 1].lchild;
		HT[i].rchild = HT[i - 1].rchild;
	}
}

void determine_w(char str[], char a_w[], int w[]) {	//求字符及其对应的权值。str数组下标从1开始,最终a,b下标都从1开始。
	int a_j = 0, w_j = 0;	//对应数组a,b中存放最后一个元素的下标
	int i, j;
	int length_str = strlen(str) - 1;

	for (i = 1; i <= length_str; i++) {
		for (j = 0; j < strlen(a_w); j++) {
			if (a_w[j] == str[i]) break;
		}
		if (j == strlen(a_w)) a_w[a_j++] = str[i];	//如果遍历数组a后,都没有发现与str[i]相等的值,那么就将该值放入数组a中
	}

	move_char(a_w);             //将a整体向后移动一位
	BiTree_sort_char(a_w);       //二叉树排序a,从下标1开始

	for (j = 1; j <= strlen(a_w); j++) {
		for (i = 1; i <= length_str; i++) {
			if (str[i] == a_w[j]) w[j]++;
		}
	}
}

void print_determine_w(char a_w[], int w[]) {  //输出字符及其对应的权值
	for (int i = 1; i <= strlen(a_w) - 1; i++) {
		printf("%c:%d ", a_w[i], w[i]);
	}
}

void Select(HuffmanTree HT, int N, int* s1, int* s2) {	//从HT[1]到HT[N]中选取权值最小的两个序号赋值给s1和s2
	int temp[100] = {}, a[100] = {};
	int j = 0, a_i = 0;

	for (int k = 1; k <= N; k++) {      //一次没用过的权值放入temp中
		if (HT[k].parent == 0)
			temp[j++] = HT[k].weight;
	}
	move_int(temp, j);
	BiTree_sort_int(temp, j);    //二叉树排序temp

	if (temp[1] != temp[2]) {    //当最小和次小权值不相等时
		for (int k = 1; k <= N; k++) {
			if (HT[k].weight == temp[1]) *s1 = k;
			if (HT[k].weight == temp[2]) *s2 = k;
		}
	}
	else {
		for (int k = 1; k <= N; k++) {
			if (HT[k].weight == temp[1]) a[a_i++] = k;
		}
		*s1 = a[0], * s2 = a[1];
	}

}

void HuffmanCoding(HuffmanTree HT, int w[], int n) {   //建立哈夫曼树的存储结构,n为字符种类个数
	if (n <= 1) return;

	int m = 2 * n - 1;

	HuffmanTree p;
	int i, s1, s2;

	for (p = HT, i = 1; i <= n; i++, p++) *p = { w[i],0,0,0 };
	for (; i <= m; i++, p++) *p = { 0,0,0,0 };

	move_HuffmanTree(HT, m);

	for (i = n + 1; i <= m; i++) {
		Select(HT, i - 1, &s1, &s2);
		HT[s1].parent = i;
		HT[s2].parent = i;
		HT[i].lchild = s1;
		HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;
	}
}

int find_order(HuffmanTree HT, int location, int n) {   //返回左或右孩子为location的序列号,n为种类数
	int m = 2 * n - 1;
	for (int i = n + 1; i <= m; i++) {
		if (HT[i].lchild == location || HT[i].rchild == location) return i;
	}
}

void find_HT_node(HuffmanTree HT, int location, int N, SqStacklist S) {  //压入栈序号为location字符的哈夫曼编码,N为find_order中的
	while (HT[N].parent != 0) {
		if (HT[N].lchild == location) Push(S, 0);
		else Push(S, 1);
		location = HT[HT[N].lchild].parent;
		N = HT[N].parent;
	}
	if (HT[N].lchild == location) Push(S, 0);
	else Push(S, 1);
}

void print_HT_node(HuffmanTree HT, int n, char a_w[], SqStacklist S) {  //输出各字符的哈夫曼编码,n为字符种类数
	for (int i = 1; i <= n; i++) {
		find_HT_node(HT, i, find_order(HT, i, n), S);
		printf("%c:", a_w[i]);
		PrintStack(S);
		printf(" ");
	}
	printf("\n");
}

void solve_number(HuffmanTree HT, int n, char a_int[], char a_char[], char a_w[]) { //译码出字符串
	int m = 2 * n - 1;
	int i = 0, j = 0;
	int a_length = strlen(a_int);
	while (i < a_length) {
		int m1 = m;
		while (HT[m1].lchild || HT[m1].rchild) {
			if (a_int[i++] == '0') m1 = HT[m1].lchild;
			else m1 = HT[m1].rchild;
		}
		a_char[j++] = a_w[m1];
	}
}


void printHuffmanCoding(HuffmanTree HT, int n) {   //输出哈夫曼树的存储结构
	int m = 2 * n - 1;
	for (int i = 1; i <= m; i++) {
		printf("%d %d %d %d %d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild);
	}
}

主函数(ceshi1):

#define _CRT_SECURE_NO_WARNINGS   //基于哈夫曼树的数据压缩算法
#include<stdio.h>                 //数组下标从1开始
#include <string.h>
#include <malloc.h>
#include"head.h"


int main() {
	while (1) {
		SqStacklist S;
		S = InitStack();

		char str[100] = {}, a_w[100] = {}, a_int[100] = {}, a_char[100] = {};
		int w[100] = {};            //放权值

		HuffmanTree HT;
		HT = (HuffmanTree)malloc(sizeof(HTNode) * 100);
		//注意!!此处一定要分配大于哈夫曼树所需求的内存,否则第一次while可以运行,之后就会出现各种bug


		scanf("%s", str);
		if (!strcmp(str, "0")) break;      //循环结束条件
		int str_length = strlen(str);
		move_char(str);

		determine_w(str, a_w, w);
		print_determine_w(a_w, w);  //输出字符及其对应的权值
		printf("\n");

		int a_length = strlen(a_w) - 1;

		HuffmanCoding(HT, w, a_length);

		printHuffmanCoding(HT, a_length);
		print_HT_node(HT, a_length, a_w, S); //输出哈夫曼树的存储结构

		printf("\n请输入编码后的字符串:\n");
		scanf("%s", a_int);
		solve_number(HT, a_length, a_int, a_char, a_w);
		printf("\n解码后:\n%s\n\n", a_char);
	}

	return 0;
}

测试结果:

  • 11
    点赞
  • 67
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
哈夫曼编码是一种数据压缩算法,可以将数据压缩成更小的数据量,以节省存储空间和传输带宽。下面是基于哈夫曼树数据压缩算法C语言实现: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_TREE_HT 100 #define MAX_CHAR 256 //哈夫曼树的结构体 struct MinHeapNode { char data; int freq; struct MinHeapNode *left, *right; }; struct MinHeap { int size; int capacity; struct MinHeapNode **array; }; //创建一个新的哈夫曼树节点 struct MinHeapNode* newNode(char data, int freq) { struct MinHeapNode* node = (struct MinHeapNode*) malloc(sizeof(struct MinHeapNode)); node->left = node->right = NULL; node->data = data; node->freq = freq; return node; } //创建一个新的最小堆 struct MinHeap* createMinHeap(int capacity) { struct MinHeap* minHeap = (struct MinHeap*) malloc(sizeof(struct MinHeap)); minHeap->size = 0; minHeap->capacity = capacity; minHeap->array = (struct MinHeapNode**) malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); return minHeap; } //交换两个哈夫曼树节点 void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) { struct MinHeapNode* t = *a; *a = *b; *b = t; } //维护最小堆的性质 void minHeapify(struct MinHeap* minHeap, int idx) { int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) smallest = left; if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) smallest = right; if (smallest != idx) { swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); minHeapify(minHeap, smallest); } } //判断最小堆是否只有一个节点 int isSizeOne(struct MinHeap* minHeap) { return (minHeap->size == 1); } //获取最小堆的最小节点 struct MinHeapNode* extractMin(struct MinHeap* minHeap) { struct MinHeapNode* temp = minHeap->array[0]; minHeap->array[0] = minHeap->array[minHeap->size - 1]; --minHeap->size; minHeapify(minHeap, 0); return temp; } //插入一个节点到最小堆 void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) { ++minHeap->size; int i = minHeap->size - 1; while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) { minHeap->array[i] = minHeap->array[(i - 1) / 2]; i = (i - 1) / 2; } minHeap->array[i] = minHeapNode; } //判断一个节点是否是叶子节点 int isLeaf(struct MinHeapNode* root) { return !(root->left) && !(root->right); } //创建一个最小堆,并将所有字符的频率作为节点的值 struct MinHeap* createAndBuildMinHeap(char data[], int freq[], int size) { struct MinHeap* minHeap = createMinHeap(size); for (int i = 0; i < size; ++i) minHeap->array[i] = newNode(data[i], freq[i]); minHeap->size = size; for (int i = (minHeap->size - 1) / 2; i >= 0; --i) minHeapify(minHeap, i); return minHeap; } //构建哈夫曼树 struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) { struct MinHeapNode *left, *right, *top; struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size); while (!isSizeOne(minHeap)) { left = extractMin(minHeap); right = extractMin(minHeap); top = newNode('$', left->freq + right->freq); top->left = left; top->right = right; insertMinHeap(minHeap, top); } return extractMin(minHeap); } //打印哈夫曼编码表 void printCodes(struct MinHeapNode* root, int arr[], int top) { if (root->left) { arr[top] = 0; printCodes(root->left, arr, top + 1); } if (root->right) { arr[top] = 1; printCodes(root->right, arr, top + 1); } if (isLeaf(root)) { printf("%c: ", root->data); for (int i = 0; i < top; ++i) printf("%d", arr[i]); printf("\n"); } } //压缩数据 void compress(char* input_string, char* output_string) { int freq[MAX_CHAR] = {0}; int n = strlen(input_string); for (int i = 0; i < n; ++i) ++freq[input_string[i]]; struct MinHeapNode* root = buildHuffmanTree(input_string, freq, MAX_CHAR); int arr[MAX_TREE_HT], top = 0; printCodes(root, arr, top); for (int i = 0; i < n; ++i) { int j; for (j = 0; j < MAX_CHAR; ++j) if (input_string[i] == root->data) break; int k = 0; while (k < top) { output_string[k++] = arr[j++]; } } } int main() { char input_string[] = "Hello World"; char output_string[MAX_TREE_HT]; compress(input_string, output_string); printf("Compressed string: %s\n", output_string); return 0; } ``` 该算法主要包括以下步骤: 1. 创建一个哈夫曼树节点结构体,包括字符数据、字符出现频率、左子节点和右子节点。 2. 创建一个最小堆结构体,包括当前堆的大小、最大容量和哈夫曼树节点数组。 3. 实现最小堆的常用操作,包括维护最小堆性质、插入节点、获取最小节点等。 4. 构建哈夫曼树,将所有字符的频率作为节点的值,以最小堆的形式存储,并通过不断提取最小值构建哈夫曼树。 5. 打印哈夫曼编码表,遍历哈夫曼树,当遇到叶子节点时,输出该字符的编码。 6. 压缩数据,遍历输入字符串,找到对应字符的编码,并将编码存储到输出字符串中。 以上是哈夫曼编码的基本实现,实际应用中还需要考虑压缩文件头、解压缩等问题。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值