Huffman编码与译码
实验要求
- 输入一段100—200字的英文短文,存入一文件a中。
- 写函数统计短文出现的字母个数n及每个字母的出现次数
- 写函数以字母出现次数作权值,建Haffman树(n个叶子),给出每个字母的Haffman编码。
- 用每个字母编码对原短文进行编码,码文存入文件b中。
- 用Haffman树对b中码文进行译码,结果存入文件c中,比较a,c是否一致,以检验编码、译码的正确性。
实验思路
- Huffman树每次需要找最小值,所以手写一个小根堆
- 自下而上建树
- 注意码序
- 译码
完整代码
注意一下直接复制代码可能会有一点小小的问题,不懂的请私聊我
//
// Created by CrazyBin on 2022/2/27.
//
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#define N 10010
#define M 1000010
char text[10010];
char b_text[1000010];
FILE *a;
FILE *b;
FILE *c;
int cnt[200];
int h[1010];
int size=0;
int ph[1010];
int hp[1010];
int total=0;
int sum=0;
int word[30];
void read(FILE *p);
void reverse(char *str,int length);
void deCode(FILE *f);
char toLowercase(char &m);
int charCnt(char *str);
void swap(int &u,int &v);
void heap_swap(int u,int v);
void fileCode(FILE *f);
void up(int u);
int pop();
void Huffman();
int getCode(char p);
void down(int u);
void insert(int x);
void fNode(int x,int y);
typedef struct HuffmanCode{
int idx;
char co[20];
char s;
}HCode;
int idx=0;
typedef struct HuffmanNode{
char s;
int weight;
int left;
int parent;
int right;
char c;
}Node;
Node node[100];
HCode code[100];
void init(Node *n,int w,char p)
{
n->s=p;
n->weight=w;
n->left=-1;
n->right=-1;
n->parent=-1;
n->c=' ';
}
void swap(int &x,int &y)
{
int t=x;
x=y;
y=t;
}
void heap_swap(int u,int v)
{
swap(h[u],h[v]);
swap(hp[u],hp[v]);
swap(ph[hp[u]],ph[hp[v]]);
}
void insert(int x)
{
idx++;
h[++size]=x;
ph[idx]=size;
hp[size]=idx;
up(size);
}
void down(int u)
{
int t=u;
if(u*2<=size&&h[t]>h[u*2]) t=u*2;
if(u*2+1<=size&&h[t]>h[u*2+1]) t=u*2+1;
if(u!=t)
{
heap_swap(u,t);
down(t);
}
}
void up(int u)
{
if(u/2>0&&h[u]<h[u/2])
{
heap_swap(u,u/2);
up(u>>1);
}
}
int pop()
{
int t=hp[1];
heap_swap(1,size);
size--;
down(1);
return t;
}
int main()
{
read(a);
Huffman();
fileCode(b);
deCode(c);
}
int getCode(char p)
{
char l = toLowercase(p);
int k=(int)l-96;
return word[k];
}
void fileCode(FILE *f)
{
f= fopen("b.txt","w+");
for(int i=0;i<N;i++)
{
char k=text[i];
if(k>='a'&&k<='z')
fputs(code[getCode(k)].co,f);
else if(k==NULL)
{
break;
}
else
{
fputc(k,f);
}
}
fclose(f);
}
int deOne(char *str,FILE *f)
{
for(int i=1;i<=total;i++)
{
if(!strcmp(str,code[i].co))
{
fputc(node[i].s,f);
// printf("%c",node[i].s);
return 1;
}
}
return 0;
}
void deCode(FILE *f)
{
FILE *r= fopen("b.txt","r");
fgets(b_text,M,r);
f= fopen("c.txt","w+");
for(int i=0;i<M;i++)
{
int j=0;
char temp[20];
memset(temp,NULL,20);
while(b_text[i]=='1'||b_text[i]=='0')
{
temp[j]=b_text[i];
if(deOne(temp,f))
break;
i++;
j++;
}
if(b_text[i]==NULL)
break;
else if(b_text[i]=='1'||b_text[i]=='0')
{
}
else
{
fputc(b_text[i],f);
}
}
}
void Huffman()
{
for(int i=1;i<=sum;i++)
{
insert(node[i].weight);
}
while(size!=1)
{
int x1=pop();
int x2=pop();
fNode(x1,x2);
// printf("%d %d %d\n",node[x1].weight,node[x2].weight,node[x1].weight+node[x2].weight);
}
for(int i=1;i<=total;i++)
{
int now=code[i].idx;
code[i].s=node[i].s;
code[i].co[now]=node[i].c;
int p=i;
while(node[p].parent!=-1)
{
code[i].co[now]=node[p].c;
p=node[p].parent;
now++;
}
code[i].idx=now;
reverse(code[i].co,code[i].idx);
printf("%s的霍夫曼编码是:%s\n",&node[i].s,&code[i].co);
}
}
void reverse(char *str,int length)
{
char cmp[20];
strncpy(cmp,str,length);
for(int i=0;i<length;i++)
{
str[i]=cmp[length-i-1];
}
}
void fNode(int x,int y)
{
sum++;
node[x].parent=sum;
node[x].c='0';
node[y].parent=sum;
node[y].c='1';
node[sum].left=x;
node[sum].right=y;
node[sum].weight=node[x].weight+node[y].weight;
node[sum].parent=-1;
insert(node[sum].weight);
}
void read(FILE *p)
{
p= fopen("a.txt","r");
fgets(text,10010,p);
charCnt(text);
for(char i=97,j=1;i<=122;i++)
{
if(cnt[i]!=0)
{
printf("%c出现的次数是:%d\n",i,cnt[i]);
init(&node[j++],cnt[i],i);
sum++;
word[i-96]=sum;
}
}
printf("共计出现%d个字母\n",sum);
total=sum;
}
int charCnt(char *str)
{
for(int i=0;i<10000;i++)
{
if((str[i]>='a'&&str[i]<='z')||(str[i]>='A'&&str[i]<='z'))
{
char ch= toLowercase(str[i]);
int k=((int)ch);
cnt[ch]++;
}
}
}
char toLowercase(char &m)
{
if(m>='A'&&m<='Z')
m=char(m+32);
return m;
}
实验截图
a.txt 文件
As chief of staff, it was his job to invade Ukraine and complete the job fast, and by that standard he has been found wanting.He has played a major role in Vladimir Putin's military campaigns ever since he commanded an army in the Chechen War of 1999, and he was at the forefront of military planning for Ukraine too, overseeing military drills in Belarus last month.Described as an "unsmiling, craggy bruiser" by Russia specialist Mark Galeotti, Gen Gerasimov also played a key role in the military campaign to annexe Crimea.Some reports suggest he has now been side-lined because of the stuttering start to invasion of Ukraine and reports of poor morale among the troops.But Andrei Soldatov believes that is wishful thinking from some quarters: "Putin cannot control every road and every battalion, and that is his role." And while the defence minister may love his uniforms, he has no military training and needs to rely on the professionals, he adds.
编码结果
b.txt
译码结果 c.txt
as chief of staff, it was his job to invade ukraine and complete the job fast, and by that standard he has been found wanting.he has played a major role in vladimir putin's military campaigns ever since he commanded an army in the chechen war of 999, and he was at the forefront of military planning for ukraine too, overseeing military drills in belarus last month.described as an "unsmiling, craggy bruiser" by russia specialist mark galeotti, gen gerasimov also played a key role in the military campaign to annexe crimea.some reports suggest he has now been side-lined because of the stuttering start to invasion of ukraine and reports of poor morale among the troops.but andrei soldatov believes that is wishful thinking from some quarters: "putin cannot control every road and every battalion, and that is his role." and while the defence minister may love his uniforms, he has no military training and needs to rely on the professionals, he adds.
好的,没问题。