Huffman树是一种最小带权二叉树,能够降低查找代价,压缩存储空间。
Huffman编码是一种前缀码,
我的工作是给定一组长度未知的 string,生成对应的Huffman表,Huffman树,和huffman加密,解码。
预定义部分
#include <iostream>
#include<cstdio>
#include<cstdlib>
#include<string>
#include<stack>
using namespace std;
typedef struct {
int weight;
int parent, lchild, rchild;
}HTNode, * HuffmanTree;
typedef char** HuffmanCode;
int Max = 100000;
HuffmanCode HC;
HuffmanTree HT;
char* Chars;
char* newchar;
int* weight;
int sumchar;
int* newweight;
char* HuffCode;
//HC,HT头节点为零;
实现函数
查找当前huffman树最小的两个节点,在HT[1..i-1]中选择parent为0,且weight最小的两个结点,
//S1<S2
void Select( int L, int* S1, int* S2) {
HuffmanTree p = HT;
int small1 = Max, small2 = Max;
for (int i = 1; i <= L; i++) {
if ((p + i)->parent == 0) {
if ((p + i)->weight < small1) {
small2 = small1;
*S2 = *S1;
small1 = (p + i)->weight;
*S1 = i;
}
else if ((p + i)->weight < small2){
small2= (p + i)->weight;
*S2 = i;
}
}
}
}
生成Huffman树,HuffmanCode,Huffman编码对照表
void HuffmanCoding( int* W, int n) {
//*W为频率数组,从大到小。n为字符个数。
if (n <= 1)
return ;
int* w = W;
int m = 2 * n - 1;
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));//构造huffmanTree
HuffmanTree p;
int i;
for (p = HT + 1, i = 1; i <= n; i++, p++, w++) {
(p)->weight = *(w);
(p)->parent = 0;
(p)->lchild = 0;
(p)->rchild = 0;
}
for (; i <= m; i++, p++, w++) {
(p)->weight = 0;
(p)->parent = 0;
(p)->lchild = 0;
(p)->rchild = 0;
}
cout << "huffman初始化" << endl;
for (i = n + 1; i <= m; i++) {
int S1, S2;
Select( i - 1, &S1, &S2);
(HT + S1)->parent = i;
(HT + S2)->parent = i;
(HT + i)->lchild = S1;
(HT + i)->rchild = S2;
(HT + i)->weight = (HT + S1)->weight+ (HT + S2)->weight;
}
HC = (HuffmanCode)malloc((n + 1) * sizeof(char*));
char* cd;
cd = (char*)malloc(n * sizeof(char));
cd[n -1] = '\0';
for (int i = 1,start,c,f; i <= n; ++i) {
start = n - 1;
for ( c = i, f = HT [i].parent; f != 0; c = f, f = (HT + f)->parent) {
start=start-1;
if ((HT + f)->lchild == c)
cd[start] = '0';
else
cd[start] = '1';
}
HC[i] = (char*)malloc((n - start) * sizeof(char));
for (int f = 0; start < n; start++, f++) {
HC[i][f] = cd[start];
}
}
cout << "huffman初始化完成" << endl;
free(cd);
cout << "赫夫曼编码表" << endl;
for (int i = 0; i < sumchar; i++) {
cout << '\n' << *(newchar + i) << ":" << *(weight + i) << ":";
for (int ij = 0; HC[i + 1][ij] != '\0'; ij++)
cout << HC[i + 1][ij];
cout << endl;
}
}//
生成Huffman加密序列
//HUffmam编码,将字符串改成01010001100形式
void HuffmanCodingRoot(string Str) {
HuffCode = (char*)malloc(sizeof(char));
int sumcode = 0;
for (int i = 0; i < Str.length(); i++) {
for (int j = 0; j < sumchar; j++) {
if (Str[i]==newchar[j]) {
for (int k = 0; HC[j+1][k] != '\0'; k++) {
HuffCode[sumcode] = HC[j+1][k];
sumcode++;
HuffCode = (char*)realloc(HuffCode, (sumcode+1) * sizeof(char));
}
break;
}
}
}
HuffCode[sumcode] = '\0';
cout << "赫夫曼编码字符串" << endl;
for (int ij = 0; HuffCode[ij] != '\0'; ij++)
cout << HuffCode[ij];
cout << endl;
}
中序遍历打印Huffman树
//中序打印Huffman树
void InOrder(int n) {
if (HT[n].lchild != 0) {
InOrder(HT[n].lchild);
}
cout << " 节点:" << n << " 权重: " << HT[n].weight << " 左节点:" << HT[n].lchild;
cout << " 右节点:" << HT[n].rchild << endl;
if (HT[n].rchild != 0) {
InOrder(HT[n].rchild);
}
}
void HuffmanTraverse(HuffmanTree HT) {
int root = 2 * sumchar - 1;
InOrder(root);
}
根据Huffman树生成,反应节点深度的数组
//根据Huffman树生成,反应节点深度的数组
void HuffmanDecoding() {
if (sumchar <= 0)
return;
int n = sumchar;
newweight = (int*)malloc(n * sizeof(int));
int r = 2 * n - 1;
for (int i = 1; i <= n; i++) {
char* s = *(HC + i);
int k = r;
for (int j = 0; j < strlen(s); j++) {
if (s[j] == '0')
k = (HT + k)->lchild;
else if (s[j] == '1') {
k = HT[k].rchild; // 向右
}
else {
return; //ERROR;
}
}
newweight[i-1] = HT[k].weight;
//保存在存储n个字符权值的数组中}}
}
}
生成频度数组于字符组,并以从高到低排列
void getChars(string Str) {
Chars = (char*)malloc((Str.length()) * sizeof(char));
for (unsigned int i = 0; i < Str.length(); i++)
*(Chars + i) = Str[i];
//生成字符频度数组newchar存放字符,wight存放字符频率,sumchar存放字符个数。
newchar = (char*)malloc(sizeof(char));
weight = (int*)malloc(sizeof(int));
sumchar = 0;
for (int i = 0, j = 0; i < Str.length(); i++) {
for (j = 0; (j < sumchar); j++) {
if (*(newchar + j) == *(Chars + i)) {
*(weight + j) = *(weight + j) + 1;
break;
}
}
if (j == sumchar) {
*(newchar + sumchar) = *(Chars + i);
*(weight + sumchar) = 1;
sumchar++;
newchar = (char*)realloc(newchar, (sumchar + 1) * sizeof(char));
weight = (int*)realloc(weight, (sumchar + 1) * sizeof(int));
}
}
for (int i = 0; i < sumchar; i++) {
for (int j = i+1; j < sumchar; j++) {
if (weight[i] < weight[j]) {
int t;
char tc;
t = weight[i];
weight[i] = weight[j];
weight[j] = t;
tc = newchar[i];
newchar[i] = newchar[j];
newchar[j] = tc;
}
}
}
}
解码
int small(char* str, char* HC) {
int i;
for (i = 0; str[i] == HC[i]; i++)
if (HC[i + 1] == '\0')
break;
if (HC[i] == str[i])
return 1;
else
return 0;
}
void HuffSolve(char* HCode) {
int i, j, k, m=0;
int* p;
p = (int*)malloc(sizeof(int));
for (i = 0; HCode[i] != '\0'; i = j) {
char str[100] = { '2' };
for (j = i;HCode[j]!='\0'; j++) {
str[j - i] = HCode[j];
int flag = 0;
for (k = 1; k <= sumchar; k++) {
if (small(str, HC[k]) == 1) {
p[m] = k;
m++;
p = (int*)realloc(p, (m + 1) * sizeof(int));
flag = 1;
break;
}
}
if (flag == 1)
break;
}
j++;
}
p[m] = -1;
cout << endl;
for (int i = 0; p[i] != -1; i++)
cout << newchar[p[i]-1];
free(p);
}
使用
int main()
{
//生成字符数组;Chars
string Str;
cin >> Str;
getChars(Str);
HuffmanCoding( weight, sumchar);
cout << "中序遍历赫夫曼编树" << endl;
HuffmanTraverse(HT);
cout << "\n赫夫曼编码字符串" << endl;
HuffmanCodingRoot(Str);
cout << "赫夫曼解码" << endl;
HuffSolve( HuffCode);
HuffmanDecoding();
for (int i = 0; i < sumchar; i++) {
cout << endl;
for (int ij = 0; HC[i + 1][ij] != '\0'; ij++)
cout << HC[i + 1][ij];
cout << " weight"<<newweight[i]<<'\n';
}
free(Chars);
free(newchar);
free(weight);
free(HC);
free(HT);
cout << "Hello World!\n";
return 0;
}
测试
afaffwaffuwfaffhefuewfhkjfhayfgwahefbwhdfgayudfgabkwhfbehfgawyefgawehdfbvahdfgwyewfbcadfhbaeiufgadfhcvadjbaiufgewdfhv
huffman初始化
huffman初始化完成
赫夫曼编码表
f:26:01
a:16:101
h:12:000
w:11:1111
e:9:1110
g:8:1001
d:8:1100
b:7:0011
u:5:0010
y:4:10001
v:3:10000
j:2:110100
k:2:110101
c:2:110110
i:2:110111
中序遍历赫夫曼编树
节点:3 权重: 12 左节点:0 右节点:0
节点:24 权重: 24 左节点:3 右节点:20
节点:9 权重: 5 左节点:0 右节点:0
节点:20 权重: 12 左节点:9 右节点:8
节点:8 权重: 7 左节点:0 右节点:0
节点:27 权重: 50 左节点:24 右节点:1
节点:1 权重: 26 左节点:0 右节点:0
节点:29 权重: 117 左节点:27 右节点:28
节点:11 权重: 3 左节点:0 右节点:0
节点:18 权重: 7 左节点:11 右节点:10
节点:10 权重: 4 左节点:0 右节点:0
节点:21 权重: 15 左节点:18 右节点:6
节点:6 权重: 8 左节点:0 右节点:0
节点:25 权重: 31 左节点:21 右节点:2
节点:2 权重: 16 左节点:0 右节点:0
节点:28 权重: 67 左节点:25 右节点:26
节点:7 权重: 8 左节点:0 右节点:0
节点:22 权重: 16 左节点:7 右节点:19
节点:12 权重: 2 左节点:0 右节点:0
节点:16 权重: 4 左节点:12 右节点:13
节点:13 权重: 2 左节点:0 右节点:0
节点:19 权重: 8 左节点:16 右节点:17
节点:14 权重: 2 左节点:0 右节点:0
节点:17 权重: 4 左节点:14 右节点:15
节点:15 权重: 2 左节点:0 右节点:0
节点:26 权重: 36 左节点:22 右节点:23
节点:5 权重: 9 左节点:0 右节点:0
节点:23 权重: 20 左节点:5 右节点:4
节点:4 权重: 11 左节点:0 右节点:0
赫夫曼编码字符串
赫夫曼编码字符串
101011010101111110101010010111101101010100011100100101110111101000110101110100010001011000101100111111010001110010011111100011000110011011000100101100011001101001111010111110000100111110000011001101111110001111001100110111111110000110001001110000101000110001100111111000111101111010011110110101110001000001110111101101110010011001101110001000110110100001011100110100001110111011100100110011110111111000100010000
赫夫曼解码
afaffwaffuwfaffhefuewfhkjfhayfgwahefbwhdfgayudfgabkwhfbehfgawyefgawehdfbvahdfgwyewfbcadfhbaeiufgadfhcvadjbaiufgewdfhv
01 weight26
101 weight16
000 weight12
1111 weight11
1110 weight9
1001 weight8
1100 weight8
0011 weight7
0010 weight5
10001 weight4
10000 weight3
110100 weight2
110101 weight2
110110 weight2
110111 weight2
Hello World!
C:\Users\86158\source\repos\HuffmanTree\Debug\HuffmanTree.exe (进程 18580)已退出,代码为 0。
按任意键关闭此窗口. . .
全部代码
#include <iostream>
#include<cstdlib>
#include<string>
using namespace std;
typedef struct {
int weight;
int parent, lchild, rchild;
}HTNode, * HuffmanTree;
typedef char** HuffmanCode;
int Max = 100000;
HuffmanCode HC;
HuffmanTree HT;
char* Chars;
char* newchar;
int* weight;
int sumchar;
int* newweight;
char* HuffCode;
//HC,HT头节点为零;
//查找当前huffman树最小的两个节点,在HT[1..i-1]中选择parent为0,且weight最小的两个结点,
//S1<S2
void Select( int L, int* S1, int* S2) {
HuffmanTree p = HT;
int small1 = Max, small2 = Max;
for (int i = 1; i <= L; i++) {
if ((p + i)->parent == 0) {
if ((p + i)->weight < small1) {
small2 = small1;
*S2 = *S1;
small1 = (p + i)->weight;
*S1 = i;
}
else if ((p + i)->weight < small2){
small2= (p + i)->weight;
*S2 = i;
}
}
}
}
//*W为频率数组,从大到小。n为字符个数。
void HuffmanCoding( int* W, int n) {
if (n <= 1)
return ;
int* w = W;
int m = 2 * n - 1;
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));//构造huffmanTree
HuffmanTree p;
int i;
for (p = HT + 1, i = 1; i <= n; i++, p++, w++) {
(p)->weight = *(w);
(p)->parent = 0;
(p)->lchild = 0;
(p)->rchild = 0;
}
for (; i <= m; i++, p++, w++) {
(p)->weight = 0;
(p)->parent = 0;
(p)->lchild = 0;
(p)->rchild = 0;
}
cout << "huffman初始化" << endl;
for (i = n + 1; i <= m; i++) {
int S1, S2;
Select( i - 1, &S1, &S2);
(HT + S1)->parent = i;
(HT + S2)->parent = i;
(HT + i)->lchild = S1;
(HT + i)->rchild = S2;
(HT + i)->weight = (HT + S1)->weight+ (HT + S2)->weight;
}
HC = (HuffmanCode)malloc((n + 1) * sizeof(char*));
char* cd;
cd = (char*)malloc(n * sizeof(char));
cd[n -1] = '\0';
for (int i = 1,start,c,f; i <= n; ++i) {
start = n - 1;
for ( c = i, f = HT [i].parent; f != 0; c = f, f = (HT + f)->parent) {
start=start-1;
if ((HT + f)->lchild == c)
cd[start] = '0';
else
cd[start] = '1';
}
HC[i] = (char*)malloc((n - start) * sizeof(char));
for (int f = 0; start < n; start++, f++) {
HC[i][f] = cd[start];
}
}
cout << "huffman初始化完成" << endl;
free(cd);
cout << "赫夫曼编码表" << endl;
for (int i = 0; i < sumchar; i++) {
cout << '\n' << *(newchar + i) << ":" << *(weight + i) << ":";
for (int ij = 0; HC[i + 1][ij] != '\0'; ij++)
cout << HC[i + 1][ij];
cout << endl;
}
}//
//HUffmam编码,将字符串改成01010001100形式
void HuffmanCodingRoot(string Str) {
HuffCode = (char*)malloc(sizeof(char));
int sumcode = 0;
for (int i = 0; i < Str.length(); i++) {
for (int j = 0; j < sumchar; j++) {
if (Str[i]==newchar[j]) {
for (int k = 0; HC[j+1][k] != '\0'; k++) {
HuffCode[sumcode] = HC[j+1][k];
sumcode++;
HuffCode = (char*)realloc(HuffCode, (sumcode+1) * sizeof(char));
}
break;
}
}
}
HuffCode[sumcode] = '\0';
cout << "赫夫曼编码字符串" << endl;
for (int ij = 0; HuffCode[ij] != '\0'; ij++)
cout << HuffCode[ij];
cout << endl;
}
//中序打印Huffman树
void InOrder(int n) {
if (HT[n].lchild != 0) {
InOrder(HT[n].lchild);
}
cout << " 节点:" << n << " 权重: " << HT[n].weight << " 左节点:" << HT[n].lchild;
cout << " 右节点:" << HT[n].rchild << endl;
if (HT[n].rchild != 0) {
InOrder(HT[n].rchild);
}
}
void HuffmanTraverse(HuffmanTree HT) {
int root = 2 * sumchar - 1;
InOrder(root);
}
//根据Huffman树生成,反应节点深度的数组
void HuffmanDecoding() {
if (sumchar <= 0)
return;
int n = sumchar;
newweight = (int*)malloc(n * sizeof(int));
int r = 2 * n - 1;
for (int i = 1; i <= n; i++) {
char* s = *(HC + i);
int k = r;
for (int j = 0; j < strlen(s); j++) {
if (s[j] == '0')
k = (HT + k)->lchild;
else if (s[j] == '1') {
k = HT[k].rchild; // 向右
}
else {
return; //ERROR;
}
}
newweight[i-1] = HT[k].weight;
//保存在存储n个字符权值的数组中}}
}
}
void getChars(string Str) {
Chars = (char*)malloc((Str.length()) * sizeof(char));
for (unsigned int i = 0; i < Str.length(); i++)
*(Chars + i) = Str[i];
//生成字符频度数组newchar存放字符,wight存放字符频率,sumchar存放字符个数。
newchar = (char*)malloc(sizeof(char));
weight = (int*)malloc(sizeof(int));
sumchar = 0;
for (int i = 0, j = 0; i < Str.length(); i++) {
for (j = 0; (j < sumchar); j++) {
if (*(newchar + j) == *(Chars + i)) {
*(weight + j) = *(weight + j) + 1;
break;
}
}
if (j == sumchar) {
*(newchar + sumchar) = *(Chars + i);
*(weight + sumchar) = 1;
sumchar++;
newchar = (char*)realloc(newchar, (sumchar + 1) * sizeof(char));
weight = (int*)realloc(weight, (sumchar + 1) * sizeof(int));
}
}
for (int i = 0; i < sumchar; i++) {
for (int j = i+1; j < sumchar; j++) {
if (weight[i] < weight[j]) {
int t;
char tc;
t = weight[i];
weight[i] = weight[j];
weight[j] = t;
tc = newchar[i];
newchar[i] = newchar[j];
newchar[j] = tc;
}
}
}
}
//根据HuffmanCode求原字符串。
int small(char* str, char* HC) {
int i;
for (i = 0; str[i] == HC[i]; i++)
if (HC[i + 1] == '\0')
break;
if (HC[i] == str[i])
return 1;
else
return 0;
}
void HuffSolve(char* HCode) {
int i, j, k, m=0;
int* p;
p = (int*)malloc(sizeof(int));
for (i = 0; HCode[i] != '\0'; i = j) {
char str[100] = { '2' };
for (j = i;HCode[j]!='\0'; j++) {
str[j - i] = HCode[j];
int flag = 0;
for (k = 1; k <= sumchar; k++) {
if (small(str, HC[k]) == 1) {
p[m] = k;
m++;
p = (int*)realloc(p, (m + 1) * sizeof(int));
flag = 1;
break;
}
}
if (flag == 1)
break;
}
j++;
}
p[m] = -1;
cout << endl;
for (int i = 0; p[i] != -1; i++)
cout << newchar[p[i]-1];
free(p);
}
int main()
{
//生成字符数组;Chars
string Str;
cin >> Str;
getChars(Str);
HuffmanCoding( weight, sumchar);
cout << "中序遍历赫夫曼编树" << endl;
HuffmanTraverse(HT);
cout << "\n赫夫曼编码字符串" << endl;
HuffmanCodingRoot(Str);
cout << "赫夫曼解码" << endl;
HuffSolve( HuffCode);
HuffmanDecoding();
for (int i = 0; i < sumchar; i++) {
cout << endl;
for (int ij = 0; HC[i + 1][ij] != '\0'; ij++)
cout << HC[i + 1][ij];
cout << " weight"<<newweight[i]<<'\n';
}
free(Chars);
free(newchar);
free(weight);
free(HC);
free(HT);
cout << "Hello World!\n";
return 0;
}
这是数据结构第一次写作欢迎提问,讨论。
本人现在是大二在校生
2022/5/6