哈夫曼编码 Huffman code(C语言实现)

 实验环境:Windows11 + Visual Studio

实验任务:

 以下的代码完成了前五个要求。

代码可以直接粘贴到IDE中运行

#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <math.h>
#define ASCIInum 127	//ascii码从0到127总共128个字符
#define Maxsize 200
#define MaxCodelen 16
#define MaxArticlelen 5000
typedef struct CharacterNode
{
	char ch;		
	int freq;
}CharacterNode;	//用来统计读入文件中字符的频数

typedef struct HTNode
{
	char ch;
	int weight;	//哈夫曼节点的权值
	int lchild;
	int rchild;
	int parent;
}HTNode;		//哈夫曼树节点
typedef struct CodeNode
{
	char ch;			//被编码的字符
	char bits[MaxCodelen + 1];	//字符的编码
}CodeNode;		//哈夫曼编码节点

int Readfile(char* file,CharacterNode *asc,int *sum);
void PrintFreq(CharacterNode* asc,int n);
void CreateHuffTree(CharacterNode*asc,int n,HTNode* T);
void SelectMin(HTNode* T, int n, int* p1, int* p2);
void DisplayHuffTree(HTNode* T,int n);
void CharsetEncoding(HTNode* T,CodeNode* Huffcode,int n);
void PrintEncoding(CodeNode* Huffcode, int n);
int FileEncoding(CodeNode* Huffcode, int n, char* infile,char* outfile);
char* FindChar(CodeNode* Huffcode, int n, char ch);
float ComputeRate_1(int chnum,char*outfile);
float ComputeRate_2(int chnum, char* outfile, int n);
int Decoding(char* Codingfile, char*textfile,HTNode* T,int valid,int chnum);
void PrintText(char* file);
void FileEncodingBinary(CodeNode* Huffcode, int n, char* infile, char* outfilebinary);
int DecodingBinary(char* CodingfileBinary, char* textfile, HTNode* T, int valid, int chnum);

int main(void)
{
	CharacterNode asc[ASCIInum];
	char* filename = "data.txt";
	int chnum = 0;		//文件中的字符数
	int valid = 0;		//有效的字符种类
	valid = Readfile(filename, asc, &chnum);
	PrintFreq(asc, valid);

	HTNode huff[Maxsize];
	CreateHuffTree(asc, valid, huff);
	DisplayHuffTree(huff, valid);

	CodeNode Huffcode[ASCIInum];
	CharsetEncoding(huff, Huffcode, valid);
	PrintEncoding(Huffcode, valid);

	char* outfile = "HuffEncoding.txt";
	char* outfilebinary = "HuffEncodingBinary.dat";
	FileEncoding(Huffcode, valid, filename, outfile);
	FileEncodingBinary(Huffcode, valid, filename, outfilebinary);
	ComputeRate_1(chnum, outfile);
	ComputeRate_2(chnum, outfile, valid);
	char* textfile = "Decode.txt";
	char* textfileFromB = "DecodeBinary.txt";
	PrintText(filename);
	Decoding(outfile, textfile, huff, valid, chnum);
	DecodingBinary(outfilebinary, textfileFromB, huff, valid, chnum);
	return 0;
}

//读取文件,统计字符的频度,按从大到小的顺序排列
int Readfile(char* file,CharacterNode* asc,int*sum)
{
	FILE* fp;
	fp = fopen(file, "r");
	if (fp == NULL)
	{
		printf("open failure\n");
		return -1;
	}
	for (int i = 0;i < ASCIInum;++i)	//初始化字符数组
	{
		asc[i].ch = i;
		asc[i].freq = 0;
	}
	
	char tempch;		//用来接受文件中的字符
	while ((tempch = fgetc(fp))!=EOF)
	{
		asc[(int)tempch].freq++;
		(* sum)++;
	}
	fclose(fp);

	//按找字符出现的频率,选择法降序排列
	int i, j, k;
	struct CharacterNode temp;
	for (i = 0;i < ASCIInum;++i)	
	{
		k = i;
		for (j = i + 1;j < ASCIInum;++j)
		{
			if (asc[j].freq > asc[k].freq)
			{
				k = j;
			}
		}
		if (k != i)
		{
			temp = asc[i];
			asc[i] = asc[k];
			asc[k] = temp;
		}
	}

	int count = 0;
	for (int i = 0;i < ASCIInum;++i)		//统计weight不为0的字符的个数
	{
		if (asc[i].freq)
		{
			count++;
		}
	}
	return count;			
}

//打印字符的出现频度
void PrintFreq(CharacterNode* asc, int n)
{
	printf("\n-----------------\n");
	printf("文本的字符种类有%d个\n", n);
	printf("字符\t频度\t\n");
	for (int i = 0;i < n;++i)
	{
		if (asc[i].ch == ' ')
		{
			printf("空格\t%d\n", asc[i].freq);
		}
		else if (asc[i].ch == '\n')
		{
			printf("\\n\t%d\n", asc[i].freq);
		}
		else
		{
			printf("%c\t%d\n", asc[i].ch, asc[i].freq);
		}
	}
	printf("-----------------\n");
}

//利用统计的字符频度,创建Huffman树
void CreateHuffTree(CharacterNode* asc, int n, HTNode* T)
{
	for (int i = 0;i < 2*n-1;++i)		//初始化huffman节点
	{
		T[i].ch = asc[i].ch;
		T[i].weight = asc[i].freq;
		T[i].lchild = -1;
		T[i].rchild = -1;
		T[i].parent = -1;
	}
	
	int p1, p2;						//用来存储被选中的两个节点的下标
	for (int i = n; i < 2*n-1;++i)	//进行n-1次合并,构造哈夫曼树
	{
		SelectMin(T,i,&p1, &p2);
		T[p1].parent = i;
		T[p2].parent = i;
		T[i].lchild = p1;
		T[i].rchild = p2;
		T[i].weight = T[p1].weight + T[p2].weight;
	}
}

//在构造Huffman树的过程中,选择最小的两个节点合成一个新的节点
//在n前面的n-1个节点中找
void SelectMin(HTNode* T, int n, int* p1, int* p2)
{
	int i, j;
	for (i = 0;i < n;++i)		//找第一个没有双亲的节点
	{
		if (T[i].parent == -1)	{
			*p1 = i;
			break;
		}
	}
	for (j = i + 1;j < n;++j)	//找第二个没有双亲的节点
	{
		if (T[j].parent == -1){
			*p2 = j;
			break;
		}
	}

	for (int k = 0;k < n; ++k)		//找最小节点
	{
		if (T[k].weight < T[*p1].weight && T[k].parent == -1 && k != *p2)
		{
			*p1 = k;
		}
	}
	for (int k = 0;k < n; ++k)		//找次小的节点
	{
		if (T[k].weight < T[*p2].weight && T[k].parent == -1 && k != *p1)
		{
			*p2 = k;
		}
	}
	return;
}

//打印哈夫曼树的表
//采用二叉树的静态存储方式(数组)
void DisplayHuffTree(HTNode* T,int n)
{
	printf("构造的哈夫曼树的静态存储表如下\n");
	printf("序号\t字符\t权值\t左孩子\t右孩子\t双亲\n");
	for (int i = 0;i < n;++i)
	{
		if (T[i].ch == '\n')
		{
			printf("%d\t\\n\t%d\t%d\t%d\t%d\t\n", i, T[i].weight, T[i].lchild, T[i].rchild, T[i].parent);
		}
		else {
			printf("%d\t%c\t%d\t%d\t%d\t%d\t\n", i, T[i].ch, T[i].weight, T[i].lchild, T[i].rchild, T[i].parent);
		}
	}
	printf("序号\t权值\t左孩子\t右孩子\t双亲\n");
	for (int i = n;i < 2 * n - 1;++i)
	{
		printf("%d\t%d\t%d\t%d\t%d\t\n", i, T[i].weight, T[i].lchild, T[i].rchild, T[i].parent);
	}
}

//生成哈夫曼编码
//对每个叶子节点,左支赋0,右支赋1,从跟到叶子的路径上的01序列就是哈夫曼编码
void CharsetEncoding(HTNode* T, CodeNode* Huffcode,int n)
{
	int child, parent;
	char cd[MaxCodelen + 1];	//临时存放编码
	int start;					//指示编码在cd中的位置
	cd[MaxCodelen] = '\0';
	//注意查找是从叶子网上找,使用编码也是从后往前走
	for (int i = 0;i < n;++i)
	{
		Huffcode[i].ch = T[i].ch;
		start = MaxCodelen;
		child = i;
		while ((parent = T[child].parent ) >= 0)	//一直上溯至根节点,根节点的双亲为-1
		{
			if (T[parent].lchild == child){		//左支为0
				cd[--start] = '0';
			}
			else {								//右支为1
				cd[--start] = '1';
			}
			child = parent;			//继续上溯
		}
		strcpy(Huffcode[i].bits, &cd[start]);
	}
}

//打印字符的哈夫曼编码
void PrintEncoding(CodeNode* Huffcode, int n)
{
	printf("字符\t编码\n");
	for (int i = 0;i < n;++i)
	{
		if (Huffcode[i].ch == '\n')
		{
			printf("\\n\t%s\n", Huffcode[i].bits);
		}
		else
		{
			printf("%c\t%s\n", Huffcode[i].ch, Huffcode[i].bits);
		}
	}
}

//利用得到的哈夫曼编码将文本文件进行压缩
//把01编码写到文本文件中
int FileEncoding(CodeNode* Huffcode, int n, char* infile,char* outfile)
{
	FILE* in = fopen(infile, "r");
	FILE* out = fopen(outfile,"a");
	if (in == NULL||out==NULL)
	{
		printf("open failure\n");
		return -1;
	}
	char temp;
	char* tempbits;
	while ((temp = fgetc(in)) != EOF)
	{
		tempbits = FindChar(Huffcode, n, temp);
		fprintf(out,"%s", tempbits);
	}
	fclose(in);
	fclose(out);
	return 0;
}

//在编码数组中,根据字符返回编码串
char* FindChar(CodeNode* Huffcode, int n, char ch)
{
	for (int i = 0;i < n;++i)
	{
		if (ch == Huffcode[i].ch)
		{
			return Huffcode[i].bits;
		}
	}
	printf("没找到字符\n");
	return ' ';
}


//计算文件的压缩率,压缩率1=哈夫曼编码的字节/ascii码的码长(8位)
float ComputeRate_1(int chnum, char* outfile)
{
	float rate;
	FILE* fp;
	fp = fopen(outfile, "r");
	if (fp == NULL)
	{
		printf("open failure\n");
		return -1;
	}
	fseek(fp, 0, SEEK_END);
	int filelen = ftell(fp);
	printf("\n哈夫曼编码的文件长度为%d\n", filelen);
	rate = (float)filelen / (chnum * 8);
	printf("ascii压缩率 = %f\n", rate);
	fclose(fp);
	return rate;
}

//和等长码相比的压缩率,压缩率2 = 哈夫曼编码的字节/平均码长
float ComputeRate_2(int chnum, char* outfile,int n)
{
	float rate;
	FILE* fp;
	fp = fopen(outfile, "r");
	if (fp == NULL)
	{
		printf("open failure\n");
		return -1;
	}
	fseek(fp, 0, SEEK_END);
	int filelen = ftell(fp);
	int averagelen = (int)ceil(log2(n));		//2的对数向上取整
	printf("\n%d个字符的等长编码的单个码长为%d\n", n,averagelen);
	printf("哈夫曼编码的文件长度为%d\n", filelen);
	rate = (float)filelen / (chnum * averagelen);
	printf("等长码压缩率 = %f\n", rate);
	fclose(fp);
	return rate;
}

//解码
//将01编码输出到文本文件中
int Decoding(char* Codingfile, char* Textfile, HTNode* T,int valid,int chnum)
{
	FILE* in = fopen(Codingfile, "r");
	FILE* out = fopen(Textfile, "a");
	if (in == NULL || out == NULL)
	{
		printf("Open failure\n");
		return -1;
	}
	char buffer[MaxArticlelen];	//接受哈夫曼编码字符串
	fgets(buffer,MaxArticlelen, in);
	char textstring[MaxArticlelen];
	int textlen = 0;
	int j = 0;	//用来指向Huffman编码串
	int child;
	printf("\n解码文件的内容如下\n");
	for (int i = 0;i < chnum;++i)		//需要翻译出chnum个字符
	{
		child = 2 * valid - 2;		//从哈夫曼树的根节点开始遍历
		while (1)
		{
			if (buffer[j] == '0')
			{
				child = T[child].lchild;
				++j;
			}
			else if (buffer[j] == '1')
			{
				child = T[child].rchild;
				++j;
			}
			if (T[child].lchild == -1 && T[child].rchild == -1)	//找到叶子节点了
			{
				textstring[textlen] = T[child].ch;
				printf("%c", textstring[textlen]);
				fprintf(out, "%c", textstring[textlen]);
				textlen++;
				break;
			}
		}
	}
	printf("\n");
	fclose(in);
	fclose(out);
	return 0;
}

//打印文本文件内容
void PrintText(char* file)
{
	printf("\n原文如下\n");
	FILE* fp = fopen(file, "r");
	if (fp == NULL)
	{
		printf("Open failure\n");
		return;
	}
	char temp;
	while ((temp = fgetc(fp))!= EOF)
	{
		printf("%c", temp);
	}
	printf("\n");
}

//把01编码写入二进制文件
//最后不满8位的s通过补0来补全
void FileEncodingBinary(CodeNode* Huffcode, int n, char* infile, char* outfilebinary)
{
	FILE* in = fopen(infile, "r");
	FILE* out = fopen(outfilebinary, "ab");
	if (in == NULL || out == NULL)
	{
		printf("open failure\n");
		return -1;
	}
	char temp;			//用来接受文本文件中读取的字符
	char* tempbits;		//存储字符的编码
	char buffer[MaxArticlelen] = { 0 };
	int len;			//统计当前缓冲区字符串的长度
	int c = 0;		//用来转换为位表示的空间
	while ((temp = fgetc(in)) != EOF)
	{
		tempbits = FindChar(Huffcode, n, temp);
		strcat(buffer, tempbits);		//把字符的编码接到缓冲字符数组中
		len = strlen(buffer);

		if (len >= 8)
		{
			for (int i = 0;i < 8;++i)	//满8个01字符转化为8位(一个字符)写入
			{
				if (buffer[i] == '1')	//二进制编码左移,最低位置1
				{
					c = c << 1;
					c |= 1;				//1的二进制表示只有最低位为1,做或运算就把最低位置为1
				}
				else if (buffer[i] == '0')	//二进制编码左移,即最低位置0
				{
					c = c << 1;
				}
			}
			fwrite(&c, sizeof(char), 1, out);	//把八位当成字符写入二进制文件
			strcpy(buffer, buffer + 8);			//更新写入后缓冲区字符串的长度
		}
		
	}
	//处理01编码的末尾
	len = strlen(buffer);	//判断最后是否有不满8位的剩余
	if (len > 0)
	{
		c = 0;
		for (int k = 0;k < 8 - len;++k)
		{
			strcat(buffer, "0");
		}
		for (int i = 0;i < 8;++i)
		{
			if (buffer[i] == '1')	//二进制编码左移,最低位置1
			{
				c = c << 1;
				c |= 1;
			}
			else if (buffer[i] == '0')	//二进制编码左移,即最低位置0
			{
				c = c << 1;
			}
		}
		fwrite(&c, sizeof(char), 1, out);
	}
	fclose(in);
	fclose(out);
	return 0;
}

//读取二进制文件,翻译出文本文件
int DecodingBinary(char* CodingfileBinary, char* textfile, HTNode* T, int valid, int chnum)
{
	FILE* in = fopen(CodingfileBinary, "rb");
	FILE* out = fopen(textfile, "a");
	if (in == NULL || out == NULL)
	{
		printf("Open failure\n");
		return -1;
	}
	int j = 0;	//用来指向Huffman编码串
	int child = 2 * valid - 2;
	char temp;	//用来读取二进制文件
	printf("\n从二进制文件中解码出的文件内容如下\n");
	int count = 0;	//统计已读出的字符数
	while(count<chnum)
	{
		fread(&temp, sizeof(char), 1, in);
		for (int i = 0;i < 8;++i)
		{
			if (temp & 128)
			{
				child = T[child].rchild;
			}
			else
			{
				child = T[child].lchild;
			}
			if (T[child].lchild == -1 && T[child].rchild == -1)
			{
				printf("%c", T[child].ch);
				fwrite(&(T[child].ch), sizeof(char), 1, out);
				count++;		//读出的字符数加1
				if (count == chnum)		break;			//如果读满了那么多字符就退出
				child = 2 * valid - 2;
			}
			temp = temp << 1;
		}
	}
	printf("\n");
	fclose(in);
	fclose(out);
	return 0;
}

  • 5
    点赞
  • 47
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
哈夫曼编码是一种用于数据压缩的算法。下面是C语言实现哈夫曼编码的示例代码: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_NODE_NUM 1000 typedef struct { int weight; int parent, lchild, rchild; } HuffmanNode; typedef struct { int bit[MAX_NODE_NUM]; int start; } HuffmanCode; void HuffmanCoding(HuffmanNode *huffmanTree, HuffmanCode *huffmanCode, int n) { int i, j, parent, left, right; for (i = 0; i < n; i++) { huffmanTree[i].parent = -1; huffmanTree[i].lchild = -1; huffmanTree[i].rchild = -1; } for (i = 0; i < n-1; i++) { int min1 = MAX_NODE_NUM, min2 = MAX_NODE_NUM; left = right = -1; for (j = 0; j < n+i; j++) { if (huffmanTree[j].parent == -1 && huffmanTree[j].weight < min1) { min2 = min1; right = left; min1 = huffmanTree[j].weight; left = j; } else if (huffmanTree[j].parent == -1 && huffmanTree[j].weight < min2) { min2 = huffmanTree[j].weight; right = j; } } huffmanTree[left].parent = n + i; huffmanTree[right].parent = n + i; huffmanTree[n + i].lchild = left; huffmanTree[n + i].rchild = right; huffmanTree[n + i].weight = min1 + min2; } for (i = 0; i < n; i++) { HuffmanCode *code = &huffmanCode[i]; code->start = n - 1; parent = huffmanTree[i].parent; while (parent != -1) { if (huffmanTree[parent].lchild == i) { code->bit[code->start] = 0; } else { code->bit[code->start] = 1; } code->start--; i = parent; parent = huffmanTree[parent].parent; } } } int main() { int i, n; HuffmanNode huffmanTree[MAX_NODE_NUM]; HuffmanCode huffmanCode[MAX_NODE_NUM]; printf("请输入叶子结点的个数:"); scanf("%d", &n); printf("请输入每个叶子结点的权重:"); for (i = 0; i < n; i++) { scanf("%d", &huffmanTree[i].weight); } HuffmanCoding(huffmanTree, huffmanCode, n); printf("哈夫曼编码表:\n"); for (i = 0; i < n; i++) { printf("结点%d的哈夫曼编码为:", i); int j; for (j = huffmanCode[i].start + 1; j < n; j++) { printf("%d", huffmanCode[i].bit[j]); } printf("\n"); } return 0; } ``` 该代码中定义了两个结构体:`HuffmanNode`表示哈夫曼树中的结点,包括权值、父结点、左孩子和右孩子;`HuffmanCode`表示哈夫曼编码,包括二进制位以及起始位置。 在`HuffmanCoding`函数中,首先初始化哈夫曼树的每个结点,然后依次求出每个非叶子结点,直到只剩下一个根结点。接着遍历每个叶子结点,求出其哈夫曼编码。最后输出哈夫曼编码表。 在主函数中,首先输入叶子结点的个数和权重,然后调用`HuffmanCoding`函数求出哈夫曼编码,最后输出哈夫曼编码表。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值