哈夫曼编/译码器

最新推荐文章于 2024-01-15 10:42:38 发布

41%加1

最新推荐文章于 2024-01-15 10:42:38 发布

阅读量944

点赞数

本文链接：https://blog.csdn.net/fyrrr/article/details/122209226

版权

哈夫曼编码编码解码字符频度哈夫曼树文本压缩

关键词由CSDN通过智能技术生成

哈夫曼编/译码器

建立哈夫曼树：读入文件(*.source)，统计文件中字符出现的频度，并以这些字符的频度作为权值，建立哈夫曼树。
编码：利用已建立好的哈夫曼树，获得各个字符的哈夫曼编码，并对正文进行编码，然后输出编码结果，并存入文件(*.code)中。
译码：利用已建立好的哈夫曼树将文件(.code)中的代码进行译码，并输出译码结果，并存入文件(.decode)中。
以下代码可以实现对大部分中文和英文的编码和译码

代码如下：

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define N 100
#define M 2 * N - 1


typedef struct
{
	char ch[3];      //字符
	int weight;     //权值
	int Parent, Lchild, Rchild;     //双亲，左孩子，右孩子
}HTNode, HuffmanTree[M + 1];    //哈夫曼树，0号不使用


typedef struct
{
	char ch[3];     //字符
	int WEI;     //权值
}weighting;     //权值


typedef struct    //堆串
{
	char* s;
	int len;
}HString;      //堆串


typedef char* HuffmanCode[N];


void read(FILE* fp, char str[]);         //读入文件
weighting* getweight(char str[], weighting w[]);     //计算权值
void Printvalue(weighting w[]);      //打印权值列表
void CrtHuffmanTree(HuffmanTree ht, weighting w[], int n);      //建立哈夫曼树
void select(HuffmanTree ht, int k, int* s1, int* s2);     //选择ht前i-1项中双亲为零且权值最小的两节点s1,s
void PrintTree(HuffmanTree ht, int n);     //打印哈夫曼树
void CrtHuffmanCode(HuffmanTree ht, HuffmanCode hc, int n);    //哈夫曼编码
void PrintCode(HuffmanCode hc, int n);     //打印哈夫曼编码
HString* Coding(char str[], weighting w[], HuffmanCode hc);     //编码并打印，并将编码保存至指定文件
void Decoding(char code[], HuffmanTree ht, int n);     //译码并保存至Decode.txt文件


int main()
{
	HuffmanTree H;
	HuffmanCode T;
	weighting WEI[10000];
	HString* STR;
	FILE* fp = NULL;
	char str[10000];
	char code[10000];
	weighting* W;
	printf("请输入获取电文的文件：");
	read(fp, str);
	W = getweight(str, WEI);     //计算权值
	int len = W[0].WEI;      //字符串数组长度
	CrtHuffmanTree(H, W, len);       //建立哈夫曼树
	CrtHuffmanCode(H, T, len);       //哈夫曼编码
	PrintCode(T, len);       //打印哈夫曼编码
	STR = Coding(str, W, T);     //编码并打印
	printf("请输入获取编码的文件：");
	read(fp, code);    //读入编码，将编码存入code
	Decoding(code, H, 2 * len - 1);     //译码
	return 0;
}


//读入文件
void read(FILE* fp, char str[])
{
	char filename[40];
	gets_s(filename);
	fp = fopen(filename, "r");
	if (fp == NULL)
	{
		printf("\nERROR!\n");
		exit(0);
	}
	int i = 0;
	char c;
	c = fgetc(fp);
	if (c == EOF)
	{
		printf("文件为空！！！");
		exit(0);
	}
	str[0] = c;
	i++;
	while (1)
	{
		c = fgetc(fp);
		if (feof(fp))
		{
			str[i] = '\0';
			break;
		}
		str[i] = c;
		i++;
	}
	//fscanf(fp, "%s", str);
	printf("文件中读出内容为：\n");
	puts(str);
	fclose(fp);
}


//构造哈夫曼树
void CrtHuffmanTree(HuffmanTree ht, weighting w[], int n)
{
	int m;
	m = 2 * n - 1;
	int i;
	//初始化哈夫曼树
	for (i = 1; i <= n; i++)
	{
		strcpy(ht[i].ch, w[i].ch);
		//ht[i].ch = w[i].ch;
		ht[i].weight = w[i].WEI;
		ht[i].Rchild = 0;
		ht[i].Lchild = 0;
		ht[i].Parent = 0;
	}
	for (i = n + 1; i <= m; i++)
	{
		ht[i].ch[0] = NULL;
		ht[i].weight = 0;
		ht[i].Rchild = 0;
		ht[i].Lchild = 0;
		ht[i].Parent = 0;
	}
	//给后面的结点赋值
	for (i = n + 1; i <= m; i++)
	{
		//选择树中双亲为0最小的两个值
		int s1, s2;
		select(ht, i - 1, &s1, &s2);
		ht[i].weight = ht[s1].weight + ht[s2].weight;
		ht[i].Lchild = s1;
		ht[i].Rchild = s2;
		ht[s1].Parent = i;
		ht[s2].Parent = i;
	}
	int len = w[0].WEI;
	PrintTree(ht, 2 * len - 1);
}


//选择ht前i-1项中双亲为零且权值最小的两节点s1,s2
void select(HuffmanTree ht, int k, int* s1, int* s2)
{
	int i;
	int j = 0;
	int min1 = 10000;
	int min2 = 10000;
	for (i = 1; i <= k; i++)
	{
		if (ht[i].weight <= min1 && ht[i].Parent == 0)
		{
			*s1 = i;
			j = i;
			min1 = ht[i].weight;
		}
	}
	for (i = 1; i <= k; i++)
	{
		if (i == j) continue;
		else
		{
			if (ht[i].weight <= min2 && ht[i].Parent == 0)
			{
				*s2 = i;
				min2 = ht[i].weight;
			}
		}

	}
}


//哈夫曼编码
void CrtHuffmanCode(HuffmanTree ht, HuffmanCode hc, int n)
{
	char* cd;
	int i;
	int c;
	int p;
	int start;
	cd = (char*)malloc(n * sizeof(char));      //临时编码数组
	cd[n - 1] = '\0';      //从叶子节点开始遍历到根，所以首先放置\0
	for (i = 1; i <= n; i++)      //求每个叶子结点的编码，共有n个叶子结点
	{
		start = n - 1;      //从后开始编写编码
		c = i;      //c为当前节点
		p = ht[i].Parent;     //p为其双亲
		while (p != 0)       //当其不为根节点时
		{
			start--;          //临时编码数组start指针向前挪动
			if (ht[p].Rchild == c)
			{
				cd[start] = '1';
			}
			else
			{
				cd[start] = '0';
			}
			c = p;
			p = ht[p].Parent;
		}
		hc[i] = (char*)malloc((n - start) * sizeof(char));
		strcpy(hc[i], &cd[start]);
	}
	free(cd);
}


//打印哈夫曼树
void PrintTree(HuffmanTree ht, int n)
{
	int i;
	printf("\nPrinthuffmantree:\n");
	printf("字符\tweight\tParent\tLchild\tRchild\n");
	for (i = 1; i <= n; i++)
	{
		if (ht[i].ch[0] == '\n')
			printf("\\n\t%d\t%d\t%d\t%d\n", ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
		else if (ht[i].ch[0] == NULL)
			printf("NULL\t%d\t%d\t%d\t%d\n", ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
		else if (ht[i].ch[0] == ' ')
			printf("' '\t%d\t%d\t%d\t%d\n", ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
		else if (ht[i].ch[2] == '\0')           //判断是否为汉字
			printf("%s\t%d\t%d\t%d\t%d\n", ht[i].ch, ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
		else
			printf("%c\t%d\t%d\t%d\t%d\n", ht[i].ch[0], ht[i].weight, ht[i].Parent, ht[i].Lchild, ht[i].Rchild);
	}
	printf("\n\n");
}


//打印哈夫曼编码
void PrintCode(HuffmanCode hc, int n)
{
	int i;
	printf("\nPrintcode:\n");
	for (i = 1; i <= n; i++)
	{
		puts(hc[i]);
	}
	printf("\n\n");
}


//计算权值
weighting* getweight(char str[], weighting* w)
{
	int i = 0;
	int j = 0;
	int k;
	int m;
	char s[3];
	int len = strlen(str);      //字符串长度
	for (i = 0, k = 1; i < len; i++, k++)     //遍历字符串,将每一个字符记录下来，并赋初值为1
	{
		if ((str[i] < 65 || (str[i] > 90 && str[i] < 97) || str[i] > 122) && str[i] != ' ' && str[i] != '\n' && str[i] != '\0')      //如果是汉字
		{
			s[0] = str[i];           //依次存入字符串
			s[1] = str[i + 1];
			s[2] = '\0';
			i++;
			strcpy(w[k].ch, s);         //拷贝当前字符串到权值列表的字符位置
			w[k].WEI = 1;
		}
		else                   //如果不是汉字
		{
			w[k].ch[0] = str[i];        //只接受一个字节放入w[k].ch[0]的位置
			w[k].WEI = 1;
		}
	}
	char str1[3];
	int chinese = 0;     //记录汉字的个数
	for (i = 0, k = 1; i < len; i++, k++)
	{
		if ((str[i] < 65 || (str[i] > 90 && str[i] < 97) || str[i] > 122) && str[i] != ' ' && str[i] != '\n' && str[i] != '\0')     //如果是汉字
		{
			str1[0] = str[i];
			str1[1] = str[i + 1];
			str1[2] = '\0';
			strcpy(w[k].ch, str1);
			i++;
			chinese++;     //记录中文的个数
		}
		else
		{
			w[k].ch[0] = str[i];
		}
		for (j = i + 1; j <= len; j++)
		{
			if ((str[j] < 65 || (str[j] > 90 && str[j] < 97) || str[j] > 122) && str[j] != ' ' && str[j] != '\n' && str[j] != '\0')
			{
				s[0] = str[j];
				s[1] = str[j + 1];
				s[2] = '\0';
				j++;                 //j向后移动一个字节，移到汉字的下一个字节
				if (strcmp(str1, s) == 0)
				{
					w[k].WEI++;
				}
			}
			else       // 不是汉字的话，直接比较
			{
				if (str[i] == str[j])        
				{
					w[k].WEI++;
				}
			}
		}
	}
	for (i = 1; i <= k; i++)     //遍历数组，去重
	{
		for (j = i + 1; j <= k; j++)
		{
			if (w[i].ch[2] == '\0')          //之前存储汉字的时候将字符串最后一个字节为'\0'，所以判断最后一个字节是否为'\0'，就可以知道当前字符是否为汉字，然后确定比较方式
			{
				if (strcmp(w[i].ch, w[j].ch) == 0)
				{
					w[j].WEI = 0;
				}

			}
			else
			{
				if (w[i].ch[0] == w[j].ch[0])
				{
					w[j].WEI = 0;
				}
			}
		}
	}
	k = 1;
	weighting wnew[10000];      //建立一个新的权值列表
	for (i = 1; i <= len - chinese; i++)       //长度为原本字符串的长度减去汉字的个数
	{
		if (w[i].WEI == 0)  continue;           //将重复字符的跳过
		else
		{
			wnew[k] = w[i];
			k++;
		}
	}
	wnew[0].WEI = k - 1;                 //权值列表的第0位的WEI值为该权值列表的长度
	w = wnew;
	Printvalue(w);
	return &w[0];
}


//打印权值列表
void Printvalue(weighting w[])
{
	int i;
	int len = w[0].WEI;
	printf("\nPrintvalue:\n");
	printf("字符\tweight\n");
	for (i = 1; i <= len; i++)
	{
		if (w[i].ch[0] == '\n')
			printf("\\n\t%d\n", w[i].WEI);
		else if (w[i].ch[0] == ' ')
			printf("' '\t%d\n", w[i].WEI);
		else
		{
			if (w[i].ch[2] == '\0')               //判断是否为汉字
			{
				printf("%s\t%d\n", w[i].ch, w[i].WEI);
			}
			else
			{
				printf("%c\t%d\n", w[i].ch[0], w[i].WEI);
			}
		}
	}
	printf("\n\n");
}


//编码并打印
HString* Coding(char str[], weighting w[], HuffmanCode hc)   //字符串、权值列表、哈夫曼编码
{
	HString HS[1000];    //堆串
	HS->len = 0;
	int i;
	FILE* fp;
	char filename[40];
	int len = strlen(str);
	int length = w[0].WEI;
	int j;
	char s[3];
	int k = 0;
	char ch;
	int flag;
	for (i = 0; i < len; i++)     //遍历字符串
	{
		if ((str[i] < 65 || (str[i] > 90 && str[i] < 97) || str[i] > 122) && str[i] != ' ' && str[i] != '\n' && str[i] != '\0')
		{
			s[0] = str[i];
			s[1] = str[i + 1];
			s[2] = '\0';
			i++;
			flag = 1;
		}
		else
		{
			flag = 0;
		}
		for (j = 1; j <= length; j++)
		{
			if (strcmp(s, w[j].ch) == 0 && flag == 1)
			{
				HS[k].s = hc[j];
				k++;
				HS->len++;
				break;
			}
			if (w[j].ch[0] == str[i] && flag == 0)
			{
				HS[k].s = hc[j];
				k++;
				HS->len++;
				break;
			}

		}
	}
	printf("编码结果：");
	for (i = 0; i < HS->len; i++)
	{
		//puts(HS[i].s);
		printf("%s", HS[i].s);
	}
	printf("\n");
	printf("请输入要保存编码的文件：");
	gets_s(filename);
	printf("\n");
	fp = fopen(filename, "w");
	if (fp == NULL)
	{
		printf("\nERROR!\n");
		exit(0);
	}
	for (i = 0; i < HS->len; i++)
	{
		fprintf(fp,"%s",HS[i].s);
	}
	fclose(fp);
	return HS;
}



//译码并保存至Decode.txt文件
void Decoding(char code[], HuffmanTree ht, int n)
{
	printf("译码结果将保存至Decode.txt文件\n");
	FILE* fp;
	fp = fopen("Decode.txt", "w");      //打开Decode.txt文件
	int i;
	int j;
	int start = n;
	int len = strlen(code);
	for (i = 0; i < len; i++)
	{
		if (code[i] == '1')
		{
			for (j = start; j > 0; j++)     //从哈夫曼树的最底下开始遍历
			{
				if (ht[j].Rchild != 0)
				{
					start = ht[j].Rchild;
					break;
				}
			}
			if (ht[start].Lchild == 0)
			{
				if (ht[start].ch[2] == '\0')
				{
					printf("%s", ht[start].ch);
					fprintf(fp, "%s", ht[start].ch);
					start = n;
				}
				else
				{
					printf("%c", ht[start].ch[0]);
					fprintf(fp, "%c", ht[start].ch[0]);
					start = n;
				}
			}
		}
		if (code[i] == '0')
		{
			for (j = start; j > 0; j++)     //从哈夫曼树的最底下开始遍历
			{
				if (ht[j].Lchild != 0)
				{
					start = ht[j].Lchild;
					break;
				}
			}
			if (ht[start].Lchild == 0)
			{
				if (ht[start].ch[2] == '\0')
				{
					printf("%s", ht[start].ch);
					fprintf(fp, "%s", ht[start].ch);
					start = n;
				}
				else
				{
					printf("%c", ht[start].ch[0]);
					fprintf(fp, "%c", ht[start].ch[0]);
					start = n;
				}
			}
		}
	}
	fclose(fp);
}

41%加1

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
哈夫曼编/译码器

#include<stdio.h>#include<stdlib.h>#include<string.h>#define N 100#define M 2 * N - 1typedef struct{ char ch; //字符 int weight; //权值 int Parent, Lchild, Rchild; //双亲，左孩子，右孩子}HTNode, HuffmanTree[M + 1]; //哈夫曼树，0号
复制链接

扫一扫