哈夫曼树及哈夫曼树的编码解码

一、哈夫曼树

哈夫曼树又称最优树,它是带权路径长度最小的树,它仅有度为m和度为0的节点(叶子)。

哈夫曼树特点:

总节点数n = (m*x - 1) / (m - 1)     //  x为叶子数,m为非叶子节点度数

特别的:哈夫曼二叉树 : n = 2*x - 1

推导:叶子数x,非叶子节点数y

总节点n = x + y = 边数 + 1 = my + 1   ->    x = (x - 1) / (m - 1)     ->    n = x + y = (m*x - 1) / (m - 1)

哈夫曼二叉树m = 2  ->   n = 2*x - 1

哈夫曼二叉树的构建思想:

①给定一个字符串,将字符串各个字符的频度整理为一个数组。

②每次选取频度最小的两个字符,分别作为左、右孩子(权重为频度),  O(n)

③父母节点权重为左右孩子之和。再将父母节点放进数组。

④重复②③操作,即可构建哈夫曼树。 O(n)      总共 O(n²)

以给定字符串abcab为例

得出频度数组:2、2、1(a、b、c)

选取1、2,作为左、右孩子,3作为父母节点

将2、3作为左、右孩子,5作为父母节点

 二、哈夫曼二叉树的编码解码

以左孩子0,右孩子1为例,

遍历每个叶子节点,O(n)

每个叶子节点再往根节点遍历,O(logn)

如果当前节点为左孩子,记为0

如果当前节点为右孩子,记为1

最后得出每个叶子节点的编码。   总共O(nlogn)

 

 三、代码实现

typedef struct Code  //记录字符及其对应编码
{
	char data;		
	int start;			//code编码起始处
	char code[100];		//编码
}Code;
typedef struct TreeNode		//哈夫曼树节点
{
	char data;
	int weigh;
	int lchild, rchild, parent;
}TNode;
class Tree			//哈夫曼树类
{
	TNode* T;			//指向哈夫曼树的第一个叶子
	int len;			//叶子长度
public:
	Tree(string str);				//构建哈夫曼树
	~Tree();
	int* GetFrequeent(string str);	//获取频度数组
	char* GetStr(string str);		//获取字符数组
	Code* GetCode();				//编码
	char* SendCode(string str);		//发送编码
	char* DeCode(string str);		//解码
	void Show(TNode *T);			//显示
	int GetLen() {
		return len;
	}
};
Tree::Tree(string str)
{
	int *nums = GetFrequeent(str);		//频度数组
	char* S = GetStr(str);				//字符数组
	int i = 0;
	len = 0;							//数组(叶子)长度
	while (nums[i]) {					//计算数组长度
		len++;	i++;
	}
	T = new TNode[2 * len - 1];			//哈夫曼树
	for (i = 0; i < len; i++) {				//初始化叶子
		T[i].data = S[i];
		T[i].weigh = nums[i];
		T[i].parent = -1;T[i].lchild = -1;T[i].rchild = -1;
	}
	delete[]nums;
	delete[]S;
	for (i = len; i < 2 * len - 1; i++) {	//初始化别的节点
		T[i].parent = -1; T[i].lchild = -1; T[i].rchild = -1;
	}
	for (i = len; i < 2 * len - 1; i++) {
		int lnode = 0, rnode = 0;			
		int min1 = 9999, min2 = 9999;
		for (int j = 0; j <= i - 1; j++) {	//找最小的两个数
			if (T[j].parent == -1) {		//没有被找过
				if (min1 > T[j].weigh) {
					min2 = min1;
					rnode = lnode;

					lnode = j;
					min1 = T[j].weigh;
				}
				else if (min2 > T[j].weigh) {
					min2 = T[j].weigh;
					rnode = j;
				}
			}
		}
		T[i].lchild = lnode;
		T[i].rchild = rnode;
		T[i].weigh = T[lnode].weigh + T[rnode].weigh;
		T[lnode].parent = i;
		T[rnode].parent = i;
	}
}
Tree::~Tree()
{
	delete T;
}
void Tree::Show(TNode *T)
{
	for (int i = 0; i < 2 * len - 1; i++) {
		cout << T[i].weigh << " ";
	}
}

int* Tree::GetFrequeent(string str)
{
	int len = str.length();		//字符串长度
	int* nums = new int[len + 1];	//频度数组,多一位0作为结束标识
	int k = 0;					//指向当前要查找的字符所对应num的位置
	int* flag = new int[len];	//辅助数组,检验是否查找过   0:未查找  1:查找过
	memset(flag, 0, len * sizeof(int));		//初始化为0
	memset(nums, 0, (len + 1) * sizeof(int));		//初始化为0
	for (int j = 0; j < len; j++) {
		if (flag[j] == 1)continue;			//查找过则不再查找
		char ch = str[j];					//当前正在查找的字符
		for (int i = j; i < len; i++) {
			if (ch == str[i]) {				//如果该字符为要查找的字符
				flag[i] = 1;				//标记为查找过
				nums[k]++;					//频度加1
			}
		}
		k++;								//下一个字符对应num位置
	}
	delete[]flag;
	return nums;
}
char* Tree::GetStr(string str)
{
	int len = str.length();			//字符串长度
	char* S = new char[len];		//字符数组
	int k = 0;						//指向当前要查找的字符所对应S的位置
	int* flag = new int[len];		//辅助数组,检验是否查找过   0:未查找  1:查找过
	memset(flag, 0, len * sizeof(int));			//初始化为0
	memset(S, 0, len * sizeof(char));			//初始化为0
	for (int j = 0; j < len; j++) {
		if (flag[j] == 1)continue;				//查找过则不再查找
		S[k] = str[j];							//当前正在查找的字符
		for (int i = j; i < len; i++) {
			if (S[k] == str[i]) {				//如果该字符为要查找的字符
				flag[i] = 1;					//标记为查找过
			}
		}
		k++;									//下一个字符对应S位置
	}
	delete[]flag;
	return S;
}
Code *Tree::GetCode()
{
	Code* Co = new Code[len];
	if (len == 1) {
		Co[0].start = len;
		Co[0].data = T[0].data;
		Co[0].code[len] = '0'; return Co;
	}
	for (int i = 0; i < len; i++) {
		Co[i].start = len;				//从len往回推
		int judgenode = i;				//标记当前节点,用于和父母节点的孩子节点进行比较,判断当前节点是左还是右
		int p = T[i].parent;			//标记父母节点,一直往根部遍历
		Co[i].data = T[i].data;			//记录当前节点数据信息
		while (p != -1) {					//如果不是根节点	注:这里不能是T[i].parent,因为它是固定的,而p是会向根部遍历的
			if (T[p].lchild == judgenode) {			//如果是左孩子
				Co[i].code[Co[i].start--] = '0';
			}
			else {		//右孩子
				Co[i].code[Co[i].start--] = '1';
			}
			judgenode = p;		//更新judgenode
			p = T[p].parent;	//p要继续向根部遍历
		}
		Co[i].start++;			//将最后一次循环的--抵消
	}
	return Co;
}
char* Tree::DeCode(string str)
{
	Code* Co = GetCode();		//获取编码
	char* S = new char[1000];	//字符数组	一开始设置为len+1,但是在main函数中delete时报错
								//原因:在对new的数组赋值是可以越界赋值的,但是无法delete越界的值,所以new的范围要合理
	int t = 0;						//指向字符数组
	int strlen = str.length();		//输入码长度
	int i = 0;			//指向输入码字符
	int iflag = 0;		//记录上次的i值,用于匹配失败返回
	while(i < strlen) {		//遍历输入码
		iflag = i; int j = 0;
		for (; j < len; j++) {			//遍历字符编码
			int k = Co[j].start;
			for (; k <= len; k++) {		//遍历字符编码的字符
				if (Co[j].code[k] == str[i]) {		//如果当前字符与输入码字符匹配
					i++;			//继续下一次匹配	//string类末尾会自动加上\0,末尾必然不匹配,不会出现越界问题
				}
				else {
					break;
				}
			}
			if (k > len) {//如果全部匹配
				S[t++] = Co[j].data; i--; break;	//记录并将最后一次循环的i++抵消
			}
			else {		//不匹配
				i = iflag;	//重置i
			}
		}
		if (j > len) {
			S[0] = 0; cout << "无效码" << endl; delete[]Co; return S;
		}
		i++;	//每次都会往后遍历,防止编码不匹配造成死循环
	}

	S[t] = 0;		//终止标识
	delete[]Co;
	
	return S;
}
char* Tree::SendCode(string str)
{
	Code* Co = GetCode();		//获取编码
	char* S = new char[10000];	//发送的编码数组
	int t = 0;		//指向编码数组

	int strlen = str.length();

	for (int i = 0; i < strlen; i++) {	//遍历待转换的字符串
		for (int j = 0; j < len; j++) {		//遍历编码数组
			if (Co[j].data == str[i]) {			//选取待转换的字符
				for (int k = Co[j].start; k <= len; k++) {	//将编码写入 发送的编码数组
					S[t++] = Co[j].code[k];
				}
			}
		}
	}
	delete[]Co;
	S[t] = 0;
	return S;

}

 main函数(测试)

int main()
{
	string str;
	cout << "输入字符串:" << endl;
	getline(cin, str);
	Tree T(str);
	Code *Co = T.GetCode();
	cout << "编码:" << endl;
	for (int i = 0; i < T.GetLen(); i++) {
		cout << Co[i].data << ": ";
		while (Co[i].start <= T.GetLen()) {
			cout << Co[i].code[Co[i].start++];
		}
		cout << endl;
	}

	char* S = T.SendCode(str);

	cout << "要发送的编码:" << endl;
	for (int i = 0; S[i]; i++) {
		cout << S[i] << " ";
	}
	cout << endl;
	cout << "解码后得到字符串:" << endl;

	string SS(S);
	char * s = T.DeCode(SS);
	for (int i = 0; s[i]; i++) {
		cout << s[i] << " ";
	}
	delete[]Co;delete[]S; delete[]s;	
	return 0;
}

 

  • 2
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
C语言可以通过构建哈夫曼树来实现编码解码。下面是一个简单的实现步骤: 1. 定义哈夫曼树的结构体,包括权值、字符、左右子树等信息。 2. 构建哈夫曼树,首先需要统计每个字符出现的频率,然后将每个字符作为一个叶子节点构建一棵树。接着,将这些树按照权值从小到大排序,每次取出权值最小的两棵树合并成一棵新树,直到只剩下一棵树为止。 3. 根据构建好的哈夫曼树生成编码表,即将每个字符的编码存储在一个数组中。 4. 对需要编码的字符串进行编码,即将每个字符替换成对应的编码。 5. 对编码后的字符串进行解码,即根据编码表将编码还原成原始字符串。 下面是一个简单的C语言实现哈夫曼树编码解码的代码示例: ``` #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_TREE_HT 100 // 定义哈夫曼树节点结构体 struct MinHeapNode { char data; unsigned freq; struct MinHeapNode *left, *right; }; // 定义哈夫曼树结构体 struct MinHeap { unsigned size; unsigned capacity; struct MinHeapNode **array; }; // 创建一个新的哈夫曼树节点 struct MinHeapNode* newNode(char data, unsigned freq) { struct MinHeapNode* node = (struct MinHeapNode*) malloc(sizeof(struct MinHeapNode)); node->left = node->right = NULL; node->data = data; node->freq = freq; return node; } // 创建一个新的哈夫曼树 struct MinHeap* createMinHeap(unsigned capacity) { struct MinHeap* minHeap = (struct MinHeap*) malloc(sizeof(struct MinHeap)); minHeap->size = 0; minHeap->capacity = capacity; minHeap->array = (struct MinHeapNode**) malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); return minHeap; } // 交换两个哈夫曼树节点 void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) { struct MinHeapNode* t = *a; *a = *b; *b = t; } // 维护最小堆的性质 void minHeapify(struct MinHeap* minHeap, int idx) { int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) smallest = left; if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) smallest = right; if (smallest != idx) { swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); minHeapify(minHeap, smallest); } } // 判断最小堆是否只有一个节点 int isSizeOne(struct MinHeap* minHeap) { return (minHeap->size == 1); } // 从最小堆中取出最小的节点 struct MinHeapNode* extractMin(struct MinHeap* minHeap) { struct MinHeapNode* temp = minHeap->array[0]; minHeap->array[0] = minHeap->array[minHeap->size - 1]; --minHeap->size; minHeapify(minHeap, 0); return temp; } // 插入一个新的节点到最小堆中 void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) { ++minHeap->size; int i = minHeap->size - 1; while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) { minHeap->array[i] = minHeap->array[(i - 1) / 2]; i = (i - 1) / 2; } minHeap->array[i] = minHeapNode; } // 判断一个节点是否是叶子节点 int isLeaf(struct MinHeapNode* root) { return !(root->left) && !(root->right); } // 创建一个最小堆并插入所有的字符 struct MinHeap* createAndBuildMinHeap(char data[], int freq[], int size) { struct MinHeap* minHeap = createMinHeap(size); for (int i = 0; i < size; ++i) minHeap->array[i] = newNode(data[i], freq[i]); minHeap->size = size; for (int i = (minHeap->size - 1) / 2; i >= 0; --i) minHeapify(minHeap, i); return minHeap; } // 构建哈夫曼树 struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) { struct MinHeapNode *left, *right, *top; struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size); while (!isSizeOne(minHeap)) { left = extractMin(minHeap); right = extractMin(minHeap); top = newNode('$', left->freq + right->freq); top->left = left; top->right = right; insertMinHeap(minHeap, top); } return extractMin(minHeap); } // 生成哈夫曼编码表 void printCodes(struct MinHeapNode* root, int arr[], int top) { if (root->left) { arr[top] = 0; printCodes(root->left, arr, top + 1); } if (root->right) { arr[top] = 1; printCodes(root->right, arr, top + 1); } if (isLeaf(root)) { printf("%c: ", root->data); for (int i = 0; i < top; ++i) printf("%d", arr[i]); printf("\n"); } } // 对字符串进行哈夫曼编码 void encode(struct MinHeapNode* root, char* str) { int n = strlen(str); int arr[MAX_TREE_HT], top = 0; for (int i = 0; i < n; ++i) { struct MinHeapNode* p = root; while (!isLeaf(p)) { if (str[i] == '0') p = p->left; else p = p->right; } printf("%c", p->data); } } // 对哈夫曼编码进行解码 void decode(struct MinHeapNode* root, char* str) { int n = strlen(str); struct MinHeapNode* p = root; for (int i = 0; i < n; ++i) { if (str[i] == '0') p = p->left; else p = p->right; if (isLeaf(p)) { printf("%c", p->data); p = root; } } } // 主函数 int main() { char arr[] = {'A', 'B', 'C', 'D', 'E'}; int freq[] = {5, 4, 3, 2, 1}; int size = sizeof(arr) / sizeof(arr[0]); struct MinHeapNode* root = buildHuffmanTree(arr, freq, size); int arrCode[MAX_TREE_HT], top = 0; printCodes(root, arrCode, top); char str[] = "ABCD"; printf("Encoded string is: "); encode(root, str); printf("\n"); char code[] = "110100011"; printf("Decoded string is: "); decode(root, code); printf("\n"); return 0; } ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值