哈夫曼树的应用

哈夫曼树

定义:给定n个权值作为n个叶子结点,构造一棵二叉树,若该树的带权路径长度达到最小,则称该二叉树为哈夫曼树,也被称为最优二叉树。

哈夫曼树的构建
构建思路
下面给出一个非常简洁易操作的算法,来构造一棵哈夫曼树:
1、初始状态下共有n个结点,结点的权值分别是给定的n个数,将他们视作n棵只有根结点的树。
2、合并其中根结点权值最小的两棵树,生成这两棵树的父结点,权值为这两个根结点的权值之和,这样树的数量就减少了一个。
3、重复操作2,直到只剩下一棵树为止,这棵树就是哈夫曼树。

代码实现

#include <iostream>
#include <fstream>
#include <cstring>
using namespace std;
 
#define MaxSize 1024
#define OK 1
#define ERROR 0
typedef int Status;
 
//记录文本出现的字母和次数
typedef struct wordcnt {
	char ch;
	int cnt = 0;
}Count;
 
//记录哈夫曼编码
typedef struct NumCount {
	Count count[MaxSize];
	int length = 0;
}NumCount;
 
//节点的权重,数据和左右子节点和父节点
typedef struct HTree {
	char data;
	int weight;
	int parent, lchild, rchild;
}HTNode, * HuffmanTree;
 
typedef struct HCode {
	char data;
	char* str;
}*HuffmanCode;
 
//将文本内容读入source数组
Status ReadData(char* source) {
	//打开文件读入数据
	ifstream infile;
	infile.open("in.txt",ios::in);
	if (!infile.is_open()) {
		cout << "文件打开失败" << endl;
	}
	else cout << "文件打开成功" << endl;
	cout << "Reading..." << endl;
	cout << "the input file is:" << endl;
	infile.getline(source, MaxSize);
	cout << source << endl;
	infile.close();
	cout << endl;
	return OK;
}
 
//记录字母数
Status WordCount(char* data, NumCount* paraCnt) {
	int flag;
	int len = strlen(data);
	for (int i = 0; i < len; i++) {
		flag = 0;
		for (int j = 0; j < paraCnt->length; j++) {
			if (paraCnt->count[j].ch == data[i]) {
				++paraCnt->count[j].cnt;
				flag = 1;
				break;
			}
		}
		if (!flag) {
			paraCnt->count[paraCnt->length].ch = data[i];
			++paraCnt->count[paraCnt->length].cnt;
			++paraCnt->length;
		}
	}
	return OK;
}
 
//打印记录下的字母和次数
Status Show(NumCount* paraCnt) {
	cout << "the length is " << paraCnt->length << endl;
	for (int i = 0; i < paraCnt->length; i++) {
		cout << "The character " << paraCnt->count[i].ch << "  appears  " << paraCnt->count[i].cnt << endl;
	}
	cout << endl;
	return OK;
}
 
//找到当前剩下的权重最小的两个节点值
Status select(HuffmanTree HT, int top, int* s1, int* s2)
{
	int min = INT_MAX;
	for (int i = 1; i <= top; ++i)  // 选择没有双亲的节点中,权重最小的节点 
	{
		if (HT[i].weight < min && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s1 = i;
		}
	}
 
	min = INT_MAX;
	for (int i = 1; i <= top; ++i)  // 选择没有双亲的节点中,权重次小的节点 
	{
		if (HT[i].weight < min && i != *s1 && HT[i].parent == 0)
		{
			min = HT[i].weight;
			*s2 = i;
		}
	}
	return OK;
}
 
//根据权重值创建哈夫曼树
Status CreateHuffmanTree(HuffmanTree& HT, int length, NumCount cntarray) {
	if (length <= 1) return ERROR;
	int s1, s2;
	int m = length * 2 - 1;
	HT = new HTNode[m + 1];
	for (int i = 1; i <= m; i++) {
		HT[i].parent = 0;
		HT[i].lchild = 0;
		HT[i].rchild = 0;
	}
 
	for (int i = length + 1; i <= m; i++) {
		select(HT, i - 1, &s1, &s2);
		HT[s1].parent = i;
		HT[s2].parent = i;
		HT[i].lchild = s1;
		HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;
	}
	return OK;
}
 
//创建哈夫曼编码
Status CreateHuffmanCode(HuffmanTree HT, HuffmanCode& HC, int length) {
	HC = new HCode[length + 1];
	char* cd = new char[length];
	cd[length - 1] = '\0';
	int c, f, start;
	for (int i = 1; i <= length; i++) {
		start = length - 1;
		c = i;
		f = HT[c].parent;
		while (f != 0) {
			--start;
			if (HT[f].lchild == c) {
				cd[start] = '0';
			}
			else cd[start] = '1';
			c = f;
			f = HT[c].parent;
		}
		HC[i].str = new char[length - start];
		HC[i].data = HT[i].data;
		strcpy(HC[i].str, &cd[start]);
	}
	delete[]cd;
	return OK;
}
 
Status Encode(char* data, HuffmanCode HC, int length) {
	ofstream outfile;
	outfile.open("code.txt");
	for (int i = 0; i < strlen(data); i++) {
		for (int j = 1; j <= length; j++) {
			if (data[i] == HC[j].data) {
				outfile << HC[j].str;
			}
		}
	}
	outfile.close();
	cout << "the code txt has been written" << endl;
	cout << endl;
	return OK;
}
 
Status Decode(HuffmanTree HT, int length) {
	char* codetxt = new char[MaxSize * length];
	ifstream infile;
	infile.open("code.txt");
	infile.getline(codetxt, MaxSize * length);
	infile.close();
 
	ofstream outfile;
	outfile.open("out.txt");
 
	int root = 2 * length - 1;
	for (int i = 0; i < strlen(codetxt); i++) {
		if (codetxt[i] == '0') root = HT[root].lchild;
		else if (codetxt[i] == '1') root = HT[root].lchild;
		if (HT[root].lchild == 0 && HT[root].rchild == 0) {
			outfile << HT[root].data;
			root = 2 * length - 1;
		}
	}
	outfile.close();
	cout << "the output txt has been written" << endl;
	cout << endl;
	return OK;
}
 
int main(char argc, char** argv) {
	char data[MaxSize];
	NumCount Cntarray;
	ReadData(data);
	WordCount(data, &Cntarray);
	Show(&Cntarray);
	HuffmanTree tree;
	CreateHuffmanTree(tree, Cntarray.length, Cntarray);
	HuffmanCode code;
	CreateHuffmanCode(tree, code, Cntarray.length);
	Encode(data, code, Cntarray.length);
	cout << "Please view the generated TXT file to check result" << endl;
	return 0;
}

运行结果

文本: The last leg of a journey marks the halfway point. 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

.彼得潘.

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值