写在前边:这是一个菜鸡的19年秋季学期数据结构实验课,趁寒假整理一下,欢迎各位大佬指正)
【实验目的】
- 掌握哈夫曼树的构造算法。
- 掌握哈夫曼编码的构造算法。
【实验内容】
问题描述
输入一串字符,根据给定的字符串中字符出现的频率建立相应的哈夫曼树,构造哈夫曼编码表,在此基础上可以对压缩文件进行压缩(即编码),同时可以对压缩后的二进制编码文件进行解压(即译码)。
输入要求
多组数据,每组数据1行,为一个字符串(只考虑26个小写字母即可)。当输入字符串为"0”时,输入结束。
输出要求
每组数据输出2n+3行(n为输入串中字符类别的个数)。第1行为统计出来的字符出现频率(只输出存在的字符,格式为字符:频度),每两组字符之间用一个空格分隔,字符按照ASCII码从小到大的顺序排列。第2行至第2n行为哈夫曼树的存储结构的终态(参照实验提示表5.2,一行当中的数据用空格分隔)。第2n+1行为每个字符的哈夫曼编码(只输出存在的字符,格式为字符:编码),每两组字符之间用一个空格分隔,字符按照ASCII码从小到大的顺序排列。第2n+2行为编码后的字符串,第2n+3行为解码后的字符串(与输入的字符串相同)。
输入样例
aaaaaaabbbbbccdddd
aaccb
0
输出样例
a:7 b:5 c2 d:4
1 7 7 0 0
2 5 6 0 0
3 2 5 0 0
4 4 5 0 0
5 6 6 3 4
6 1 1 7 2 5
7 1 8 0 1 6
a:0 b:10 c:110 d:111
00000001010101011011011111
aaaaaaabbbbbccdddd
a:2 b:1 c:3
1 2 4 0 0
2 1 4 0 0
3 3 5 0 0
4 3 5 2 1
5 6 0 3 4
a:11 b:10 c:0
111110000
aabccc
【实验提示】
首先,读入一行字符串,统计每个字符出现的频率;然后,根据字符出现的频率利用提示算法1建立相应的哈夫曼树;最后,根据得到的哈夫曼树利用算法2求出每个字符的哈夫曼编码。
#include<iostream>
#include<cstring>
#include<stdio.h>
#include<string>
#define MAX 100
using namespace std;
int coun[26]; //频率
char saveletter[26];//存字母
char temp[MAX];//暂存被译码串
typedef struct htnode
{
int weight;
int lchild, rchild, parent;
char data;
int frequency;//出现频率
}*huftree;
typedef char **hufcode;
void select(huftree &hf, int x, int &s1, int &s2)//在叶子结点里找最小的两个
{
int min = 999, cmin = 999;//最小值和次小值
int i = 1;
while (i <= x)
{
if (hf[i].parent == 0)
{
if (hf[i].weight < min)//寻找权值最小
{
min = hf[i].weight;
s1 = i;
}
i++;
}
else
i++;
}
int flag = s1;
i = 1;
while (i <= x)
{
if (hf[i].parent == 0)
{
if ((hf[i].weight > min && hf[i].weight < cmin) || (hf[i].weight == min && flag != i))//找次小值
{
cmin = hf[i].weight;
s2 = i;
}
i++;
}
else
i++;
}
}
void Create(huftree &hf, int n)//叶子为n的哈树有2n-1个结点
{
int m = 2 * n - 1, s1 = 0, s2 = 0;
if (n <= 1) return;
hf = new htnode[m + 1];//0号单元不用
for (int i = 1; i <= m; i++)//都初始化为0
{
hf[i].parent = 0;
hf[i].rchild = 0;
hf[i].lchild = 0;
hf[i].data = saveletter[i - 1];//字母
}
for (int i = 1; i <= n; i++)
hf[i].weight = coun[i - 1];//输入权值
for (int i = n + 1; i <= m; i++)//前n个为叶子,后面需要构建
{
select(hf, i - 1, s1, s2);//选择最小的两个节点,返回序号
hf[s1].parent = i;
hf[s2].parent = i;//结点双亲变为i
hf[i].lchild = s1;
hf[i].rchild = s2;//i的左右孩子
hf[i].weight = hf[s1].weight + hf[s2].weight;
//i权值更改
}
}
void Show(huftree &hf, int x)
{
for (int i = 1; i <= 2 * x - 1; i++)
{
cout << i << " ";
cout << hf[i].weight << " " << hf[i].parent << " " << hf[i].lchild << " " << hf[i].rchild << endl;
}
}
void count(char str[], huftree &hf, int &n)//出现频率 ,字母个数
{
int num[26];
char ch;
int i = 0, j = 0;
memset(num, 0, sizeof(num));
while (str[i] != '\0')
{
j = str[i] - 97;
num[j]++;
i++;
}
j = 0;
for (i = 0; i < 26; i++)
{
if (num[i] != 0)
{
saveletter[j] = char(i + 97);
coun[j] = num[i];
j++;
}
}
n = j;
for (int i = 0; i < n; i++)
{
if (i == n - 1)
cout << saveletter[i] << ":" << coun[i];
else
cout << saveletter[i] << ":" << coun[i] << " ";
}
cout << endl;
}
void hfcode(huftree &hf, hufcode &hc, int n)
{
char *cd;
int start = 0, c, f;
hc = new char*[n + 1];//编码表
cd = new char[n];//每个字符的编码一定小于n
cd[n - 1] = '\0';
for (int i = 1; i <= n; i++)
{
start = n - 1;
c = i;
f = hf[i].parent;
while (f != 0)//不是根节点
{
start--;
if (hf[f].lchild == c)
cd[start] = '0';
else
cd[start] = '1';
c = f;//向上回溯
f = hf[f].parent;
}
hc[i] = new char[n - start];
strcpy(hc[i], &cd[start]);//把临时空间的编码复制到编码表中
}
delete cd;
int i, j, z = 0;
for (j = 1; j <= n; j++)//输出字母编码
{
if (j == n)
cout << saveletter[j - 1] << ":" << hc[j];
else
cout << saveletter[j - 1] << ":" << hc[j] << " ";
}
cout << endl;
}
void transtonum(huftree &hf, hufcode &hc, int n, char str[])
{
for (int i = 0; str[i] != '\0'; i++)
for (int j = 1; j <= n; j++)
if (str[i] == saveletter[j - 1])
{
cout << hc[j];
strcat(temp, hc[j]);
}
cout << endl;
}
void transtoletter(huftree &hf, hufcode &hc, int n)
{
int i = 2 * n - 1;
int j = 0;
while (temp[j] != '\0')
{
if (temp[j] == '0')
i = hf[i].lchild; //左孩子
else if (temp[j] == '1')
i = hf[i].rchild; //右孩子
if (hf[i].lchild == 0)
{
cout << hf[i].data;
i = 2 * n - 1;
}
j++; //无论是否找到叶子节点都读取下一个编码串字符
}
cout << endl;
}
int main()
{
while (1)
{
huftree hf;
hufcode hc;
int n;
char str[MAX];
scanf("%s", &str);
if (str[0] == '0')
break;
count(str, hf, n);
Create(hf, n);
Show(hf, n);
hfcode(hf, hc, n);
transtonum(hf, hc, n, str);
transtoletter(hf, hc, n);
memset(coun, 0, sizeof(coun));
memset(saveletter, '\0', sizeof(saveletter));
memset(temp, '\0', sizeof(temp));
delete hf;
delete hc;
}
return 0;
}