哈夫曼树是一种经典的数据结构,也称为最优二叉树。它被用来压缩数据,尤其是文本文件,因为文本文件有很多重复的部分。
哈夫曼树是由一组权值给定的叶子节点构建而成的,其中每个叶子节点代表一个具有权值的字符。哈夫曼树的构建过程是基于贪心策略的,它将频率高的字符对应的叶子节点放在较浅的层次上,频率低的字符对应的叶子节点放在较深的层次上,从而使整个树的深度最小。
哈夫曼树的构建过程需要通过反复合并两个权值最小的节点来实现,直到树中只剩下一个根节点。构建完成后,从根节点到每个叶子节点的路径就是该叶子节点对应的字符的编码,哈夫曼编码是一种前缀编码,它保证任何一个字符的编码都不是另一个字符编码的前缀,因此在解码时,可以保证无歧义地恢复出原始的文本。
代码
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAXSIZE 32767
typedef char HuffmanCode;
typedef struct {
char data;
int weight;
int parent, lchild, rchild;
}HTNode, * HuffmanTree;
typedef struct {
char data;
int weight;
}Auxiliary;
Auxiliary character[MAXSIZE] = {};
int length = 0;
void InitialEssay(char* essay) {
int i, j;
bool flag;
for (i = 0;i < strlen(essay);i++) {
flag = 1;
for (j = 0;j < length;j++) {
if (essay[i] == character[j].data) {
character[j].weight++;
flag = 0;
}
}
if (flag) {
character[j].data = essay[i];
character[j].weight++;
length++;
}
}
}
void InitialHT(HuffmanTree &HT) {
if (length <= 0) {
return;
}
int number = length * 2 - 1;
HT = (HTNode*)malloc(sizeof(HTNode) * number);
for (int i = 0;i < number;i++) {
HT[i].data = -1;
HT[i].weight = -1;
HT[i].parent = -1;
HT[i].lchild = -1;
HT[i].rchild = -1;
}
for (int i = 0;i < length;i++) {
HT[i].data = character[i].data;
HT[i].weight = character[i].weight;
}
}
void Select(HuffmanTree &HT, int size, int& position1, int& position2) {
int min1 = MAXSIZE, min2 = MAXSIZE;
for (int i = 0;i < size;i++) {
if (HT[i].weight < min1 && HT[i].parent == -1) {
position2 = position1;
min2 = min1;
min1 = HT[i].weight;
position1 = i;
}
else if (HT[i].weight < min2 && HT[i].parent == -1) {
min2 = HT[i].weight;
position2 = i;
}
}
}
void CreateHT(HuffmanTree &HT) {
if (length <= 1) {
return;
}
int position1, position2;
int number = length * 2 - 1;
for (int i = length;i < number;i++) {
Select(HT, i, position1, position2);
HT[position1].parent = i;
HT[position2].parent = i;
HT[i].lchild = position1;
HT[i].rchild = position2;
HT[i].weight = HT[position1].weight + HT[position2].weight;
}
}
void PrintHT(HuffmanTree &HT, HuffmanCode** HC) {
int i, j, k;
char* temp = (char*)malloc(sizeof(char) * length);
HC = (HuffmanCode**)malloc(sizeof(HuffmanCode*) * length);
temp[length - 1] = '\0';
for (i = 0;i < length;i++) {
int start = length;
int flag = HT[i].parent;
int ins = i;
while (flag != -1) {
start--;
if (ins == HT[flag].lchild) {
temp[start] = '0';
}
else {
temp[start] = '1';
}
ins = flag;
flag = HT[flag].parent;
}
HC[i] = (HuffmanCode*)malloc(sizeof(HuffmanCode) * (length - start));
for (j = start, k = 0;j < length;j++, k++) {
HC[i][k] = temp[j];
}
HC[i][k] = '\0';
printf("%c -> %s\n", HT[i].data, HC[i]);
}
free(temp);
}
int main() {
HuffmanTree HT = NULL;
HuffmanCode** HC = NULL;
char essay[MAXSIZE] = "";
printf("请输入想要进行编码的原文:\n");
scanf("%s", essay);
InitialEssay(essay);
InitialHT(HT);
CreateHT(HT);
printf("该文本各个字符的哈夫曼编码为:\n");
PrintHT(HT, HC);
return 0;
}
运行结果