HuffmanTree
I leaned it about few months ago, then I found this and decided to upload it.
The code part:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ALPHABET_LENGTH 26
char alphabet[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'm', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
int alphabet_weights[] = {817, 149, 278, 425, 1270, 223, 202, 609, 697, 15, 77, 403, 241, 675, 751, 193, 10, 599, 633, 906, 276, 98, 236, 15, 197, 7};
typedef struct{
int weight;
int parent, lchild, rchild;
} HTNode, *HuffmanTree;
//HT数组中存放的哈夫曼树,end表示HT数组中存放结点的最终位置,s1和s2传递的是HT数组中权重值最小的俩个结点在数组中的位置
void Select(HuffmanTree HT, int end, int *s1, int *s2){
int min1, min2;
//遍历数组初始下标为1
int i = 1;
//找到还没构建树的结点
while(HT[i].parent != 0 && i <= end){
i++;
}
min1 = HT[i].weight;
*s1 = i;
i++;
while(HT[i].parent != 0 && i <= end){
i++;
}
//对找到的俩个结点比较大小,min2为大的,min1为小的
if(HT[i].weight < min1){
min2 = min1;
*s2 = *s1;
min1 = HT[i].weight;
*s1 = i;
}
else{
min2 = HT[i].weight;
*s2 = i;
}
//俩个结点和后续的所有未构建成树的结点作比较
for(int j = i + 1; j <= end; j++){
//如果有父结点,直接跳过,进行下一个
if(HT[j].parent != 0){
continue;
}
//如果比最小的还小,将min2 = min1, min1赋值新的结点的下标
if(HT[j].weight < min1){
min2 = min1;
min1 = HT[j].weight;
*s2 = *s1;
*s1 = j;
}
else if(HT[j].weight >= min1 && HT[j].weight < min2){
min2 = HT[j].weight;
*s2 = j;
}
}
}
void CreateHuffmanTree(HuffmanTree &HT, int weights[], int n){
if (n < 1){
return;
}
int m = 2 * n - 1;
HT = new HTNode[m + 1]; //0号位不用
//初始化
for (int i = 1; i <= m; i++){
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
for (int i = 1; i <= n; i++){
HT[i].weight = weights[i - 1];
}
for (int i = n + 1; i <= m; i++){
int min1, min2;
Select(HT, i, &min1, &min2);
HT[i].weight = HT[min1].weight + HT[min2].weight;
HT[i].lchild = min1;
HT[i].rchild = min2;
HT[min1].parent = HT[min2].parent = i;
}
}
//HuffmanCodeTable
typedef char **HuffmanCodeTable;
void CreateHuffmanCode(HuffmanTree HT, HuffmanCodeTable &HC, int n){
HC = new char *[n + 1]; //Huffman编码表数组
char *cd = new char[n]; //分配临时存放编码的动态数组空间
cd[n - 1] = '\0';
for (int i = 1; i <= n; i++){ //对每个叶子结点求解Huffman编码
int start = n - 1;
int c = i;
int f = HT[i].parent;
while (f != 0){ //从叶子向上回溯,直到根结点
start--; //回溯一次start向前指的位置
if (HT[f].lchild == c){
cd[start] = '0'; //左孩子生成代码0
}
else{
cd[start] = '1'; //右孩子生成代码1
}
c = f;
f = HT[f].parent; //继续回溯
}
HC[i] = new char[n - start]; //为第i个字符编码分配空间
strcpy(HC[i], &cd[start]); //求得的编码从临时空间复制到HC
}
delete[] cd; //释放临时空间
}
int main(){
HuffmanTree ht;
CreateHuffmanTree(ht, alphabet_weights, ALPHABET_LENGTH);
printf("Alphabet\tNode\t|Weight\t\t|Parent\t|Lchild\t|Rchild\t|\n");
for (int j = 1; j <= ALPHABET_LENGTH * 2 - 1; j++){
HTNode node = ht[j];
if (j <= ALPHABET_LENGTH){
printf("%c", alphabet[j - 1]);
}
printf("\t\t%d\t|%d\t|\t|%d\t|%d\t|%d\t|\n", j, node.weight, node.parent, node.lchild, node.rchild);
}
HuffmanCodeTable hc;
CreateHuffmanCode(ht, hc, ALPHABET_LENGTH);
printf("Alphabet\tNode\tAlphabet_weights->\tEncode\n");
for (int j = 1; j <= ALPHABET_LENGTH; j++){
if (j <= ALPHABET_LENGTH){
printf("%c", alphabet[j - 1]);
}
printf("\t\t%d\t%d\t\t->\t%d\n", j, alphabet_weights[j - 1], hc[j]);
}
//输入单词
char text[100];
printf("========== Input your Text ==========\n");
scanf("%s", &text);
//编码
int text_len = strlen(text);
char *encoded_text = new char[text_len * ALPHABET_LENGTH];
int start = 0; //每个字母的起始位置
for (int i = 0; i < text_len; i++){ //开启循环逐个判断字符串进行编码
char c = text[i];
int index = c - 'a' + 1; //利用ascii码匹配权重值
char *huffman_code = hc[index];
strcpy(encoded_text + start, huffman_code); //把当前字符串进行编码
start += strlen(huffman_code);
}
printf("******* Encode Completed *******\n%s\n", encoded_text);
//解码
int root_index = 1;
while (ht[root_index].parent != 0){ //直到找到根节点才能进行解码
root_index++;
}
char decoded_text[100]; //解码存储位置
int cur = 0; //做当前编码存储位置
int encode_len = strlen(encoded_text); //获取编码字符串长度
int l = 0; //带入编码循环
while (l < encode_len){
int p = root_index; //利用p判断路径(伪指针)
while (ht[p].lchild != 0 || ht[p].rchild != 0){ //直到叶子结点才停止
char code = encoded_text[l++]; //寻找叶子结点
if (code == '0'){ //0为左孩子1为右孩子
p = ht[p].lchild;
}
else{
p = ht[p].rchild;
}
}
decoded_text[cur] = 'a' + p - 1; //导入到解码存储空间
cur++;
}
decoded_text[cur] = '\0';
printf("******* Decode Completed *******\n%s\n", decoded_text);
return 0;
}
Output:(not included the decode or encode part)
Alphabet Node |Weight |Parent |Lchild |Rchild |
a 1 |817 | |44 |0 |0 |
b 2 |149 | |32 |0 |0 |
c 3 |278 | |36 |0 |0 |
d 4 |425 | |38 |0 |0 |
e 5 |1270 | |47 |0 |0 |
f 6 |223 | |34 |0 |0 |
g 7 |202 | |33 |0 |0 |
h 8 |609 | |41 |0 |0 |
i 9 |697 | |42 |0 |0 |
j 10 |15 | |28 |0 |0 |
k 11 |77 | |30 |0 |0 |
l 12 |403 | |38 |0 |0 |
n 13 |241 | |35 |0 |0 |
m 14 |675 | |42 |0 |0 |
o 15 |751 | |43 |0 |0 |
p 16 |193 | |32 |0 |0 |
q 17 |10 | |27 |0 |0 |
r 18 |599 | |40 |0 |0 |
s 19 |633 | |41 |0 |0 |
t 20 |906 | |45 |0 |0 |
u 21 |276 | |36 |0 |0 |
v 22 |98 | |31 |0 |0 |
w 23 |236 | |35 |0 |0 |
x 24 |15 | |28 |0 |0 |
y 25 |197 | |33 |0 |0 |
z 26 |7 | |27 |0 |0 |
27 |17 | |29 |26 |17 |
28 |30 | |29 |10 |24 |
29 |47 | |30 |27 |28 |
30 |124 | |31 |29 |11 |
31 |222 | |34 |22 |30 |
32 |342 | |37 |2 |16 |
33 |399 | |37 |25 |7 |
34 |445 | |39 |31 |6 |
35 |477 | |39 |23 |13 |
36 |554 | |40 |21 |3 |
37 |741 | |43 |32 |33 |
38 |828 | |44 |12 |4 |
39 |922 | |45 |34 |35 |
40 |1153 | |46 |36 |18 |
41 |1242 | |46 |8 |19 |
42 |1372 | |47 |14 |9 |
43 |1492 | |48 |37 |15 |
44 |1645 | |48 |1 |38 |
45 |1828 | |49 |20 |39 |
46 |2395 | |49 |40 |41 |
47 |2642 | |50 |5 |42 |
48 |3137 | |50 |43 |44 |
49 |4223 | |51 |45 |46 |
50 |5779 | |51 |47 |48 |
51 |10002 | |0 |49 |50 |
Alphabet Node Alphabet_weights-> Encode
a 1 817 -> 10039184
b 2 149 -> 10039200
c 3 278 -> 10038904
d 4 425 -> 10038920
e 5 1270 -> 10030968
f 6 223 -> 10030984
g 7 202 -> 10031000
h 8 609 -> 10031016
i 9 697 -> 10031032
j 10 15 -> 10031048
k 11 77 -> 10031072
l 12 403 -> 10031088
n 13 241 -> 10031104
m 14 675 -> 10031120
o 15 751 -> 10031136
p 16 193 -> 10031152
q 17 10 -> 10031168
r 18 599 -> 10031192
s 19 633 -> 10031208
t 20 906 -> 10031248
u 21 276 -> 10031264
v 22 98 -> 10031280
w 23 236 -> 10031296
x 24 15 -> 10032248
y 25 197 -> 10031312
z 26 7 -> 10032272