哈夫曼编码
压缩软件:
给定一篇文章,只含有英文大小写字母和空格,以.txt格式存储,统计该文件中各种字符的频率,对各字符进行Huffman编码,将该文件翻译成Huffman编码文件,再将Huffman编码文件翻译成源文件。
创建结构体数组,数组的每个元素存有字符,频率,父节点下边,左右孩子的下标。假设有n个结点,先统计每个字符出现的频率当做权值,找出两个权值最小的下标,权值的和作为新的下标存在结构体数组中,遍历n-1次,得到哈夫曼树。从叶结点出发,一直找父节点,直至父节点为根(即par = 0),左边为0,右边为1,得到哈夫曼编码;解码 时按位读取,有映射关系直接输出,否则继续向后读取。
#include <bits/stdc++.h>
using namespace std;
map<char, int>M;
map<char, string>M1;
const int N = 500;
char s[N];
struct node
{
int weight, par, l, r;
char c;
};
//找两个最小权的下标min1, min2
void select(node *Htree, int m, int &min1, int &min2)
{
min1 = min2 = 0;
Htree[0].weight = 1e9;
for(int i = 1; i <= m; i++)
{
if(!Htree[i].par && Htree[i].weight < Htree[min1].weight)
min1 = i;
}
Htree[min1].par = -1;
for(int i = 1; i <= m; i++)
{
if(!Htree[i].par && Htree[i].weight < Htree[min2].weight)
min2 = i;
}
}
//创建哈夫曼树
void H_tree(node *Htree, int n)
{
int min1 = 0, min2 = 0;
int now = n+1, m = n;//now是新节点的下标,m是寻找的上限
for(int i = 1; i < n; i++)
{
select(Htree, m, min1, min2);//在[1,m]中找两个最小权的下标组成now
Htree[now].weight = Htree[min1].weight + Htree[min2].weight;
Htree[now].l = min1, Htree[now].r = min2;
Htree[min1].par = now, Htree[min2].par = now;
now++;
m++;
}
}
//打印树
//void print(node *t,int next)
//{
// printf("%d\n",t[next].weight);
// if(t[next].l)
// print(t, t[next].l);
// if(t[next].r)
// print(t, t[next].r);
//}
//进行哈夫曼编码
void encode(node *Htree, int n, int len)
{
char temp[N];//临时存放某个叶子节点的哈夫曼编码
int now = N-1;
//逆序推出哈夫曼编码,左边为0,右边为1
temp[now] = '\0';
puts("编码规则为:");
for(int i = 1; i <= n; i++)
{
now = N-1;
for(int j = i; Htree[j].par != 0; j = Htree[j].par)
{
int p = Htree[j].par;
if(Htree[p].l == j) temp[--now] = '0';
else temp[--now] = '1';
}
printf("%c : %s\n", Htree[i].c, temp+now);
string temp1(temp+now);
M1[Htree[i].c] = temp1;
}
puts("源码 -> 哈夫曼 见2.txt !");
//将源代码转化成哈夫曼代码保存文件
ofstream savefile("2.txt");
for(int i = 0; i < len; i++)
savefile << M1[s[i]];
savefile.close();
}
//进行哈夫曼解码
void decode(node *Htree)
{
puts("哈夫曼 -> 源码 见3.txt !");
FILE *fp1, *fp2;
fp1 = fopen("2.txt", "r");
fscanf(fp1, "%[^\n]", s);
fp2 = fopen("3.txt", "w");
int len = strlen(s), c = 0;
char str[N];
for(int i = 0; i < len; i++)
{
str[c++] = s[i];
str[c] = '\0';
for(map<char, string>::iterator it = M1.begin(); it != M1.end(); it++)
{
if(it->second == str)
{
fprintf(fp2, "%c", it->first);
c = 0;
}
}
}
}
int main()
{
///请先在程序路径中创建1.txt!!!
FILE *fp = NULL;
int len = 0;
char c;
fp = fopen("1.txt", "r");
if(!fp)
{
puts("请先在源目录创建1.txt!");
return 0;
}
while((c = fgetc(fp)) != EOF)
s[len++] = c;
fclose(fp);
node *Htree;
Htree = (node*)malloc((2*len)*sizeof(node));//Htree从0-2*len-1
for(int i = 0; i < len; i++)
M[s[i]]++; //统计每个字符的频率
int n = 0;
//n个叶子的哈夫曼树共有2*n-1个节点
for(int i = 0; i < len; i++)
{
if(M[s[i]] != 0)
{
Htree[++n].weight = M[s[i]];
M[s[i]] = 0; //频率置0防止重复计数
Htree[n].par = Htree[n].l = Htree[n].r = 0;
Htree[n].c = s[i];
}
}
Htree[0].weight = Htree[0].par = Htree[0].l = Htree[0].r = 0;//0号节点全部赋值为0,并不使用
for(int i = n+1; i < 2*n; i++)
Htree[i].weight = Htree[i].par = Htree[i].l = Htree[i].r = 0;
H_tree(Htree, n);
//print(Htree,2*n-1);
encode(Htree, n, len);
decode(Htree);
return 0;
}
源目录1.txt内容如下
as6d54as56d as165f 1asf 0as as0ff a0 fasf as sf a sa f
测试结果
运行产生了2.txt和3.txt
出现的问题是对文件的操作不熟悉,又复习一波c和c++的文件操作。总的来说本次实验不难,就是一个模拟的过程,非常考验耐心和毅力。