#include <stdio.h>
#include <string.h>
#include <iostream>
#include <fstream>
using namespace std;
class HM{
typedef struct Haffm {
int frequency=0;
char word='\0';
string code="";
int lchild=-1,rchild=-1,parent=-1;
}Haffmtree;
Haffmtree Tree[2*26];
int parent=-1;
public:
void code(int bit,string str){
if(bit!=-1){
if(Tree[bit].lchild==-1&&Tree[bit].rchild==-1){
Tree[bit].code+=str;
}
code(Tree[bit].lchild, str+'0');
code(Tree[bit].rchild, str+'1');
}
}
void _min2(int *a, int *b, int n)//找权值最小的两个a和b
{
int i;
double weight = 0; //找最小的数
for (i = 0; i <n; i++)
{
if (Tree[i].parent != -1) //判断节点是否已经选过
continue;
else if(Tree[i].frequency!=0)
{
if (weight == 0||Tree[i].frequency < weight)
{
weight = Tree[i].frequency;
*a = i;
}
}
}
weight = 0; //找第二小的数
for (i = 0; i < n; i++)
{
if (Tree[i].parent != -1 || (i == *a))//排除已选过的数
continue;
else if(Tree[i].frequency!=0)
{
if (weight == 0)
{
weight = Tree[i].frequency;
*b = i;
}
else
{
if (Tree[i].frequency < weight)
{
weight = Tree[i].frequency;
*b = i;
}
}
}
}
int temp;
if (Tree[*a].lchild < Tree[*b].lchild)
{
temp = *a;
*a = *b;
*b = temp;
}
}
void initializeFromFile(string FileName){
int a[26],sum=0;
char ch;
memset(a, 0, sizeof(a));
//统计字符频率
FILE *fp;
char f_name[100];
strcpy(f_name,FileName.c_str());
fp=fopen(f_name,"r");
while (fscanf(fp, "%c", &ch)!=EOF) {
//printf("%c",ch);
a[ch-'a']++;
sum++;
}
printf("bytes:%d\n",sum);
//创建哈弗曼树
for (int i = 0; i < 26; i++)
{
printf("%c:%d\n",i+'a',a[i]);
Tree[i].frequency = a[i];
Tree[i].word = i+'a';
}
for (int i = 26; i <51; i++)
{
int i1 = -1;
int i2 = -1;
_min2(&i1, &i2, i); //将i1,i2节点合成节点k
if(i1==-1||i2==-1){
parent=i-1;
break;
}
Tree[i1].parent = i;
Tree[i2].parent = i;
Tree[i].frequency = Tree[i1].frequency + Tree[i2].frequency;
Tree[i].lchild = i1;
Tree[i].rchild = i2;
parent=i;
}
code(parent,"");
}
void encodeFile(string InFile, string OutFile){
FILE *fpi,*fpo;
int sum=0;
char f_name[100],fo_name[100],ch;
strcpy(f_name,InFile.c_str());
strcpy(fo_name, OutFile.c_str());
fpi=fopen(f_name,"r");
fpo=fopen(fo_name, "w");
while (fscanf(fpi, "%c", &ch)!=EOF) {
//读一个字符,写一个编码
fprintf(fpo, "%s",Tree[ch-'a'].code.c_str());
sum+=Tree[ch-'a'].code.length();
}
printf("encode_bytes:%d\n",sum/8);
fclose(fpi);fclose(fpo);
}
void decodeFile(string InFile, string OutFile){
FILE *fpin,*fpout;
int sum=0;
char f_name[100],fo_name[100],ch;
int q=parent;
strcpy(f_name,InFile.c_str());
strcpy(fo_name, OutFile.c_str());
fpin=fopen(f_name,"r");
fpout=fopen(fo_name, "w");
while (fscanf(fpin, "%c", &ch)!=EOF) {
if(ch=='0'){//left
q=Tree[q].lchild;
}
else if(ch=='1'){//right
q=Tree[q].rchild;
}
if(Tree[q].lchild==-1&&Tree[q].rchild==-1){
fprintf(fpout, "%c",q+'a');
q=parent;
sum++;
}
}
printf("decode_bytes:%d\n",sum);
fclose(fpin);fclose(fpout);
}
};
int main(){
HM T;
T.initializeFromFile("FileToBeProcessed.txt");
T.encodeFile("FileToBeProcessed.txt", "FileToBeProcessed.enc");
T.decodeFile("FileToBeProcessed.enc", "FileToBeProcessed.dec");
return 0;
}
哈夫曼文件编码解码
最新推荐文章于 2024-04-20 11:37:23 发布