数据结构1:哈夫曼数和哈夫曼编码
C语言,如果使用C++/Java实现,利面向对象的优势应该更好。
这里不再阐述哈夫曼编码为何是最短前缀编码的理论知识
哈夫曼编码实现原理非常简单:
1.利用贪心的思想,构造当前最优树
2.进而构造出整体的最优树
3.再利用哈夫曼树生成哈夫曼编码
实现的难点在于数据结构的设计,操作数据结构也要仔细。
尽量通过模块化的思想逐步解决。
首先是结构体和函数声明
主要是三个函数:
-
Huffman_List *init_huffman_list(char *c, int n);
根据输入的字符串信息,生成森林list,存放huffman结点。 -
Huffman_Tree *construct_huffman_tree(char *c, int n);
从list中,不断移除掉两个权值最小结点,加入到哈夫曼树,并根据权值这和创建新的结点加入到list。 -
Huffman_Table *generate_huffman_table(char *c, int n);
根据遍历哈夫曼树,并生成哈夫曼编码
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
typedef struct huffman_node {
int weight;
char value;
char code;//被放置在左孩子则是0,右孩子则是1
struct huffman_node *parent;
struct huffman_node *next;
struct huffman_node *left;
struct huffman_node *right;
}Huffman_Node;
typedef struct huffman_tree {
Huffman_Node *root;
}Huffman_Tree;
//list:森林,即没有加入哈夫曼树的结点或者根节点,链表实现,便于插入删除操作
typedef struct huffman_list {
int n;
Huffman_Node *head;
}Huffman_List;
//编码
typedef struct huffman_code {
char value;
char *code_str;
struct huffman_code *next;
}Huffman_Code;
//编码表,存储多个编码
typedef struct huffman_table {
Huffman_Code *head;
}Huffman_Table;
Huffman_List *init_huffman_list(char *c, int n);
Huffman_Tree *construct_huffman_tree(char *c, int n);
Huffman_Table *generate_huffman_table(char *c, int n);
void generate_huffman_code(Huffman_Node *root, Huffman_Table *huffman_table, char code, int deep);
void add_code_str(Huffman_Table *huffman_table, char value, char *code_str);
void free_huffman_node(Huffman_Node *root);
int find_and_upweight(Huffman_List *huffman_list, char key);
Huffman_Node *add_node(Huffman_List *huffman_list, char value, int weight);
Huffman_Node *remove_min(Huffman_List *huffman_list);
下面是详细代码
Huffman_Node *remove_min(Huffman_List *huffman_list)//把最大的结点从list中移除,并将结点返回
{
Huffman_Node *pre_p = (Huffman_Node *)malloc(sizeof(Huffman_Node));//辅助结点,表示当前结点的前一个结点,便于删除结点
Huffman_Node *bak = pre_p;//备份,便于释放内存
pre_p->next = huffman_list->head;
Huffman_Node *min_pre_node = pre_p;
Huffman_Node *p = pre_p->next;
int min_weight = p->weight;
while (p != NULL)
{
if (p->weight < min_weight)
{
min_pre_node = pre_p;
min_weight = p->weight;
}
pre_p = p;
p = p->next;
}
//如果最小的结点是头结点,pre_p仍然指向辅助的结点,修改它对List没有作用,所以特殊处理
if (min_pre_node->next == huffman_list->head)
{
p = huffman_list->head;
huffman_list->head = huffman_list->head->next;
}
else
{
p = min_pre_node->next;
min_pre_node->next = p->next;
}
huffman_list->n--;
free(bak);
return p;
}
/*
添加一个结点到list,并将新结点返回
在头部插入,比在尾部插入效率更高
*/
Huffman_Node *add_node(Huffman_List *huffman_list,char value,int weight)
{
Huffman_Node *new_node = (Huffman_Node *)malloc(sizeof(Huffman_Node));
new_node->weight = weight;
new_node->value = value;
new_node->next = huffman_list->head;
new_node->parent = new_node->left = new_node->right = NULL;//不要忘记初始化为NULL
huffman_list->head = new_node;
huffman_list->n++;
return new_node;
}
/*
查找是否存在相同字符,找到则weight+1
返回0或1
*/
int find_and_upweight(Huffman_List *huffman_list,char key)
{
Huffman_Node *p = huffman_list->head;
while (p != NULL)
{
if (p->value == key)
{
p->weight=p->weight+1;
return 1;
}
p = p->next;
}
return 0;
}
void free_huffman_node(Huffman_Node *root)//递归释放树结点
{
if (root == NULL)return;
Huffman_Node *left = root->left;
Huffman_Node *right = root->right;
free(root);
free_huffman_node(left);
free_huffman_node(right);
}
void add_code_str(Huffman_Table *huffman_table, char value, char *code_str)
{
Huffman_Code *new_code = (Huffman_Code*)malloc(sizeof(Huffman_Code));
new_code->value = value;
new_code->code_str = code_str;
new_code->next = huffman_table->head;
huffman_table->head = new_code;
}
Huffman_List *init_huffman_list(char *c, int n)//初始化list
{
Huffman_List *huffman_list = (Huffman_List*)malloc(sizeof(Huffman_List));
Huffman_Node *p = (Huffman_Node*)malloc(sizeof(Huffman_Node));
p->value = c[0];
p->weight = 1;
p->parent =p->next =p->left=p->right= NULL;
huffman_list->head = p;
huffman_list->n = 1;
int i = 1;
while (i < n)
{
if (!find_and_upweight(huffman_list, c[i]))//没有找到,则添加
add_node(huffman_list,c[i],1);//出现一次新的字符,增加node,weight=1
i++;
}
return huffman_list;
}
/*
构造哈夫曼树
*/
Huffman_Tree *construct_huffman_tree(char *c,int n)
{
Huffman_List *huffman_list =init_huffman_list(c,n);
Huffman_Tree *huffman_tree = (Huffman_Tree *)malloc(sizeof(Huffman_Tree));
//huffman_tree->leaf_nodes_n = huffman_list->n;//叶子结点数等于初始的list->n
Huffman_Node *left;
Huffman_Node *right;
Huffman_Node *root;
int weight_sum;
//哈夫曼算法核心:贪心思想
while (huffman_list->n > 1)//只剩下1个结点,说明只有root,构造完成
{
left = remove_min(huffman_list);
right = remove_min(huffman_list);
weight_sum = left->weight + right->weight;
root = add_node(huffman_list, '#', weight_sum);//每次循环,删除两个最小的,再把权值加起来的新结点插入list
left->parent = right->parent = root;
root->left = left;
root->right = right;
}
free(huffman_list);//构造哈夫曼树之后,释放list
huffman_tree->root = root;
printf("成功构造哈夫曼树\n");
return huffman_tree;
}
void generate_huffman_code(Huffman_Node *root,Huffman_Table *huffman_table,char code,int deep)//codes[i],value[j]
{
root->code = code;
if (root->left == NULL && root->right == NULL)//访问到叶子结点,则生成一串编码
{
Huffman_Node *p = root;
char *code_str=(char *)malloc((deep+1)*sizeof(char));//根据树深度(高度),动态申请code内存,+1个字节用来保存结束符\0
code_str[deep] = '\0';
int i = deep-1;
while (p != NULL)
{
code_str[i] = p->code;
p = p->parent;
i--;
}
add_code_str(huffman_table,root->value,code_str);
return;
}
generate_huffman_code(root->left, huffman_table, '0',deep+1);
generate_huffman_code(root->right, huffman_table, '1', deep + 1);
}
Huffman_Table *generate_huffman_table(char *c, int n)//生成哈夫曼表
{
Huffman_Tree *huffman_tree = construct_huffman_tree(c, n);//根据字符串构建哈夫曼树
Huffman_Table *huffman_table = (Huffman_Table*)malloc(sizeof(Huffman_Table));
huffman_table->head = NULL;//不要忘了初始化为NULL
generate_huffman_code(huffman_tree->root, huffman_table,'0',1);//递归去遍历树,并生成哈夫曼编码表,根设为0,后面要去掉
free_huffman_node(huffman_tree->root);//利用哈夫曼树生成编码之后,记得释放所有树结点
free(huffman_tree);
printf("成功生成哈夫曼编码\n");
return huffman_table;
}
测试:
int main()
{
char c[100] = {0};
scanf("%s",c);
Huffman_Table *huffman_table=generate_huffman_table(c,(int)strlen(c));
Huffman_Code *p = huffman_table->head;
Huffman_Code *p_next = p->next;
while(p!=NULL)
{
printf("%c:%s\n",p->value,p->code_str);
p = p->next;
}
//释放编码表
p= huffman_table->head;
while (p != NULL)
{
p_next = p->next;
free(p);
p = p_next;
}
free(huffman_table);
system("pause");
return 0;
}
测试结果:
输入abcdefgabca即:{a:3},{b:2},{c:2},{d:1},{e:1},{f:1}
不要看第一个字符:
如a的编码应该是是:11
根据编码表,从根结点开始,0是往左走,1是往右走,得到的树应该长这样:
只是简单测试了一下,有哪里不对的地方请指出。