数据结构我好爱:09 Huffman树(无理论版)

现在默认你已经明白了Huffman树的原理!(问就是理论我也不会!)

通过我片面操作的有了片面的结论:

   在Huffman应用与字符压缩的时候,虽然原理是树,但是确实主要的就表,有好几种记录表来记录信息:孩子双亲以及权重的记录,单个字符对应编码串的记录等

各种结构体:

        1.单个node存储字符与出现次数

        2.根据ASII码,字符共128种且有部分无法打印显示

                创建List记录表

        3.创建孩子双亲记录表:

        4.字符编码记录表:通过char就可以确定它的编码串

typedef struct {
	char ch;
	int count;
}node;

typedef struct {
	node data[catage];//128种
	int length;
}List,*ListPtr;

typedef struct {
	int parent;
	int lchild;
	int rchild;
	int weight;
	char data;
}record_map,*record_mapPtr;//记录表

typedef struct {//unoder_map <char,int>
	char data;
	char* str;
}code_graph, * code_graphPtr;//编码表

初始化:

        1.List记录表的初始化,length是为了记录有多少种不同的字符串(与后文cat:category相同);

        2.创建字符编码记录表

ListPtr Init() {
	ListPtr list = (ListPtr)malloc(sizeof(List));
	list->length = 0;
	return list;
}

//----------------------------------------------

code_graphPtr Init_graph(int len) {
	code_graphPtr graph = (code_graphPtr)malloc(sizeof(code_graph)*len);

	for (int i = 0; i < len; i++) {
		graph[i].str = (char*)malloc(sizeof(char) * len);
	}
	return graph;
}

读取文件:

        fgets 读取一行(.txt的一行可以 “ 无限长 ”)读取到source数组;

void Read(char* source) {
	FILE* fp = fopen("D:\\D_desktop\\input.txt", "r+");
	if (fp == NULL) return;
	else {
		fgets(source, Max, fp);
	}
	fclose(fp);
}

字符计数:(靠字符串与整数的变换)

        ASII数组,字符的编码小于128;

        memset ASII数组全置为0;

        ASII [i] 的值表示字符 i 的出现次数

        ASII数组非0的数据都交给List记录表吧!

void CharCount(ListPtr list, char* source) {
	int ASII[catage];
	memset(ASII, 0, sizeof(int) * catage);
	for (int i = 0; i < strlen(source); i++) {
		ASII[(int)source[i]]++;
		//printf("%c --%d \n", source[i],ASII[source[i]]);
	}
	for (int i = 0; i < catage; i++) {
		if (ASII[i] != 0) {
			//printf("%c --%d \n", i, ASII[i]);
			list->data[list->length].ch = (char)i;//???
			list->data[list->length].count = ASII[i];
			list->length++;
		}
	}
}

选择两个较小权值:每次选取前 len 个当中的两个权重较小值

        _CRT_INT_MAX; 是vs自带定义的宏

void select(int len, int* tow_ints, record_mapPtr root) {
	int min = _CRT_INT_MAX;
	for (int i = 1; i <= len; i++) {
		if (root[i].weight < min && root[i].parent==0) {
			tow_ints[0] = i;
			min = root[i].weight;
		}
	}
	min = _CRT_INT_MAX;
	for (int i = 1; i <= len; i++) {
		if (root[i].weight < min && i != tow_ints[0] && root[i].parent == 0) {
			tow_ints[1] = i;
			min = root[i].weight;
		}
	}
}

创建孩子双亲权重记录表:

        二叉树有一种数组表示方法:创建孩子双亲权重记录表的过程其实就是创建了一颗树;

record_mapPtr creatHuff(ListPtr list) {
	record_mapPtr root = (record_mapPtr)malloc(sizeof(record_map) * ((list->length) * 2 + 1));
	int total = 2 * list->length - 1;
	for (int i = 0; i <= list->length; i++) {
		root[i].parent = 0;
		root[i].lchild = 0;
		root[i].rchild = 0;
		root[list->length + i].parent = 0;
		root[list->length + i].lchild = 0;
		root[list->length + i].rchild = 0;

		root[i+1].weight = list->data[i].count;
		root[i+1].data = list->data[i].ch;
		//printf("%c ", list->data[i].ch);
	}
	int min[2];
	for (int i = list->length + 1; i <= total; i++) {
		select(i - 1, min, root);
		root[i].lchild = min[0];
		root[i].rchild = min[1];
		root[i].weight = root[min[0]].weight + root[min[1]].weight;
		root[min[0]].parent = i;
		root[min[1]].parent = i;
	}
	return root;
}

 生成code文件:

        1.只查找叶子结点,并得出单个字符的编码串;

        因为是从叶子往上知道根节点(0节点就是根节点),所以编码串顺序为倒序,通过从后往前取来解决;

        2.获得编码串;

        当前叶子的父母为p,如果它是p的左孩子则存入 0,否则 1

        然后p的父母为q,如果它是q的左孩子则存入 0,否则 1、、(开始套娃!!!)

        直到p==0;

        3.获得codefile.txt文本

        由上得到的是出现字符的编码,有编码记录表记录,遍历源文件source,挨个字符转化为编码串并存入codefile.txt当中;

void codefile(code_graphPtr graph, ListPtr list,record_mapPtr root,char* source) {

	FILE* fp = fopen("D:\\D_desktop\\codefile.txt", "w+");
	if (fp == NULL) return;
	int cat = list->length;
	char* code_str = (char*)malloc(sizeof(char) * (cat + 1));
	code_str[cat] = '\0';
	int start = cat - 1;
	for (int i = 1; i <= cat; i++) {
		graph[i-1].data = root[i].data;
		int p = root[i].parent;
		int child = i;
		while (p != 0) {
			if (root[p].lchild == child) code_str[start--] = '0';
			else code_str[start--] = '1';
			child = p;
			p = root[p].parent;
		}
		strcpy(graph[i-1].str, &code_str[start+1]);
		/*printf("%c---> %s \n", graph[i - 1].data, &code_str[start+1]);
		fputc(graph[i - 1].data, fp);
		fputc(':', fp);
		fputs(graph[i-1].str, fp);
		fputc('\n', fp);*/
		start = cat - 1;
	}
	for (int i = 0; i < strlen(source); i++) {
		for (int j = 0; j < cat; j++) {
			if(source[i]==graph[j].data) fputs(graph[j].str, fp);
		}
	}
	fclose(fp);
}

解码并存入output文件:

        因为每个编码串不可能有重叠部分:每个结点都特意被放在了叶子结点上,如果要有前缀重叠部分就一定得是祖先与后辈的关系!

        通过特定的编码串找回原来的叶子结点(无左右孩子就是其判断条件!)

void decode(int len,record_mapPtr root) {
	FILE* fp = fopen("D:\\D_desktop\\codefile.txt", "r");
	if (fp == NULL) return;
	FILE* fp2 = fopen("D:\\D_desktop\\output.txt", "w+");
	if (fp2 == NULL) return;
	char* code = (char*)malloc(sizeof(char) * Max * len);
	fgets(code, Max * len, fp);
	int R = 2 * len - 1;
	for (int i = 0; i < strlen(code); i++) {
		if (code[i] == '0') R = root[R].lchild;
		if (code[i] == '1') R = root[R].rchild;
		if (root[R].lchild == 0 && root[R].rchild == 0)  // 如果已经是叶子节点,输出到输出文件中,然后重新回到根节点 
		{
			fputc(root[R].data, fp2);
			R = 2 * len - 1;
		}
	}
	fclose(fp);
	fclose(fp2);
}

 效果:

input.txt:

        Life is full of confusing and disordering Particular time,a particular location,Do the arranged thing of ten million time in the brain,Step by step ,the life is hard to avoid delicacy and stiffness No enthusiasm forever,No unexpected happening of surprising and pleasing So,only silently ask myself in mind Next happiness,when will come?

codefile.txt:

        011100010001001111011010000101101100111111010010001010100111001110111111100111100100111111010101000110011101110110001100100101011001000001010011111001001011011110000011001110111010111001010001110001100001111111111010010100011100101011000011110011011110101000101011111000111000110000111111111101001010001110010100100011111111100001100000011110011101001110000001110101100100111011011000111001110010001100111011110110010101011001001000110011101110100111001110101101101110010111110000000100010000001111001010110000111100110110100011001010110010011101101111110111110010000001100111010111110100110110101111101111110110111011010101011011010111110111101001100100111011010010000100111101101000010110101001100011100100101010110001110110000100000001100010010101100101101001000011111110001111110111011011000110010010101010101100001001110011110011010101010110111111000011101110111000110010011111010101000100001011111001011001100111110011010100000110111100111010111110000111011111011100110101000100111111011111110110110110010101010011000011110111111011100000110011101110100111001110101011111011110001111111000000101000110011101110110001100100101010111100101101100001010001100111011101111110100011111010001111000010011101101010100000101101110001100010011101101100001010111001110111110001110101011101001010011101000110010111110000011001001010111111001101010001001101010100110000111101111000110011010101010111101001000010100111011100101010000100000100010101111111001111110011010100011101000110 

​​​​​​​

 完整代码:

#include<stdio.h>
#include<string.h>
#include<malloc.h>

#define Max 1024
#define catage 128//数字 32–126 分配给了能在键盘上找到的字符,当您查看或打印文档时就会出现


typedef struct {
	char ch;
	int count;
}node;

typedef struct {
	node data[catage];//128种
	int length;
}List,*ListPtr;

typedef struct {
	int parent;
	int lchild;
	int rchild;
	int weight;
	char data;
}record_map,*record_mapPtr;//记录表

typedef struct {//unoder_map <char,int>
	char data;
	char* str;
}code_graph, * code_graphPtr;//编码表

ListPtr Init() {
	ListPtr list = (ListPtr)malloc(sizeof(List));
	list->length = 0;
	return list;
}

void Read(char* source) {
	FILE* fp = fopen("D:\\D_desktop\\input.txt", "r+");
	if (fp == NULL) return;
	else {
		fgets(source, Max, fp);
	}
	fclose(fp);
}

void CharCount(ListPtr list, char* source) {
	int ASII[catage];
	memset(ASII, 0, sizeof(int) * catage);
	for (int i = 0; i < strlen(source); i++) {
		ASII[(int)source[i]]++;
		//printf("%c --%d \n", source[i],ASII[source[i]]);
	}
	for (int i = 0; i < catage; i++) {
		if (ASII[i] != 0) {
			//printf("%c --%d \n", i, ASII[i]);
			list->data[list->length].ch = (char)i;//???
			list->data[list->length].count = ASII[i];
			list->length++;
		}
	}
}

void select(int len, int* tow_ints, record_mapPtr root) {
	int min = _CRT_INT_MAX;
	for (int i = 1; i <= len; i++) {
		if (root[i].weight < min && root[i].parent==0) {
			tow_ints[0] = i;
			min = root[i].weight;
		}
	}
	min = _CRT_INT_MAX;
	for (int i = 1; i <= len; i++) {
		if (root[i].weight < min && i != tow_ints[0] && root[i].parent == 0) {
			tow_ints[1] = i;
			min = root[i].weight;
		}
	}
}

record_mapPtr creatHuff(ListPtr list) {
	record_mapPtr root = (record_mapPtr)malloc(sizeof(record_map) * ((list->length) * 2 + 1));
	int total = 2 * list->length - 1;
	for (int i = 0; i <= list->length; i++) {
		root[i].parent = 0;
		root[i].lchild = 0;
		root[i].rchild = 0;
		root[list->length + i].parent = 0;
		root[list->length + i].lchild = 0;
		root[list->length + i].rchild = 0;

		root[i+1].weight = list->data[i].count;
		root[i+1].data = list->data[i].ch;
		//printf("%c ", list->data[i].ch);
	}
	int min[2];
	for (int i = list->length + 1; i <= total; i++) {
		select(i - 1, min, root);
		root[i].lchild = min[0];
		root[i].rchild = min[1];
		root[i].weight = root[min[0]].weight + root[min[1]].weight;
		root[min[0]].parent = i;
		root[min[1]].parent = i;
	}
	return root;
}


code_graphPtr Init_graph(int len) {
	code_graphPtr graph = (code_graphPtr)malloc(sizeof(code_graph)*len);

	for (int i = 0; i < len; i++) {
		graph[i].str = (char*)malloc(sizeof(char) * len);
	}
	return graph;
}


void codefile(code_graphPtr graph, ListPtr list,record_mapPtr root,char* source) {

	FILE* fp = fopen("D:\\D_desktop\\codefile.txt", "w+");
	if (fp == NULL) return;
	int cat = list->length;
	char* code_str = (char*)malloc(sizeof(char) * (cat + 1));
	code_str[cat] = '\0';
	int start = cat - 1;
	for (int i = 1; i <= cat; i++) {
		graph[i-1].data = root[i].data;
		int p = root[i].parent;
		int child = i;
		while (p != 0) {
			if (root[p].lchild == child) code_str[start--] = '0';
			else code_str[start--] = '1';
			child = p;
			p = root[p].parent;
		}
		strcpy(graph[i-1].str, &code_str[start+1]);
		/*printf("%c---> %s \n", graph[i - 1].data, &code_str[start+1]);
		fputc(graph[i - 1].data, fp);
		fputc(':', fp);
		fputs(graph[i-1].str, fp);
		fputc('\n', fp);*/
		start = cat - 1;
	}
	for (int i = 0; i < strlen(source); i++) {
		for (int j = 0; j < cat; j++) {
			if(source[i]==graph[j].data) fputs(graph[j].str, fp);
		}
	}
	fclose(fp);
}

void decode(int len,record_mapPtr root) {
	FILE* fp = fopen("D:\\D_desktop\\codefile.txt", "r");
	if (fp == NULL) return;
	FILE* fp2 = fopen("D:\\D_desktop\\output.txt", "w+");
	if (fp2 == NULL) return;
	char* code = (char*)malloc(sizeof(char) * Max * len);
	fgets(code, Max * len, fp);
	int R = 2 * len - 1;
	for (int i = 0; i < strlen(code); i++) {
		if (code[i] == '0') R = root[R].lchild;
		if (code[i] == '1') R = root[R].rchild;
		if (root[R].lchild == 0 && root[R].rchild == 0)  // 如果已经是叶子节点,输出到输出文件中,然后重新回到根节点 
		{
			fputc(root[R].data, fp2);
			R = 2 * len - 1;
		}
	}
	fclose(fp);
	fclose(fp2);
}




int main() {
	char source[1000];
	Read(source);
	ListPtr list = Init();
	CharCount(list, source);
	record_mapPtr root = creatHuff(list);
	code_graphPtr graph = Init_graph(list->length);
	codefile(graph, list, root,source);
	decode(list->length,root);
	return 0;
}

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值