哈夫曼树编码译码

设计任务 


1、文本文件的编码解码(文本文件压缩为01编码,解码01编码为文本文件)
有一篇英文文章存储在一个文本文件中,现在要以报文形式将文章发送。编写程序,设计Haffman编码将文章转换为01编码。报文接收端要将01编码转换为英文文章,编写程序将Haffman编码报文译码。

基本要求:
(1)读英文文章“Text.txt”,统计每个字符在文章中出现的频率;将结果打印到文件“Freq.txt”
(2)构建Haffman树,求出每个字符的Haffman编码;将结果打印到文件“HaffCode.txt”
(3)将英文文章“Text.txt”转换为01字符串,将结果打印到文件“EngtoCode.txt” 
(4)读文件“EngtoCode.txt”,将01字符串转换成英文文章。(译码)  

数据描述  

定义一个哈夫曼树结构体

typedef struct{

        int weight,flag;

        int parent,Lchild,Rchild;

}HTNode,*HuffmanTree;

定义一个结构体来存储文本的每个字符和出现的次数

typedef struct{

        char c;

        int n=1;

}character;

算法思想 

  1. 先读取Text.txt文本统计每个字母出现的次数存储到character结构体中;
  2. 将每个字符出现次数当做HTNode.weight;
  3. 建立哈夫曼树;
  4. 从根出发向左为0向右为1直到左右孩子都为0,此时该结点的字符01编码即可得出
  5. 读取文件,将字符代替成哈夫曼编码;
  6. 读取解码文件从根出发,每次读取一个字符,当读取字符为0到左孩子当字符为1到右孩子直到左右孩子都为0,读取字符。

源码 

#include<bits/stdc++.h>
using namespace std;

#define MAX 999999

typedef struct{
	char c;
	int n=1;
}character;

typedef struct{
	int weight,flag;
	int parent,Lchild,Rchild;
}HTNode,*HuffmanTree;

typedef char ** HuffmanCode; 
HuffmanCode HC;
character z[128];

int ReadFile(){
	FILE *p=fopen("Text.txt","r");
	FILE *pf=fopen("Freq.txt","w+");
	if((p=fopen("Text.txt","r"))==NULL){
		cout<<"ERROR";
		exit(0);
	}
	int i=1;
	char ch='1';
	while(ch!=EOF){
		int page=0;
		ch=fgetc(p);
		for(int j=1;j<=i;j++){
			if(ch==z[j].c){
				z[j].n++;
				page=1;
				break;
			}
		}
		if(page==0){
			z[i].c=ch;
			i++;
		}
	}
	for(int k=1;k<i-1;k++){
		cout<<z[k].c<<":"<<z[k].n<<endl;
		fputs("字符",pf);
		fputc(z[k].c,pf);
		fputs("的频率:",pf);
		fprintf(pf,"%d",z[k].n);
		fputc('\n',pf);
	}
	fclose(p);
	fclose(pf);
	return i-2;
}

void InitHuffmanTree(HuffmanTree &HT, int n)
{
	if(n>1){
		int i;
		HT=(HTNode *)malloc(2*n*sizeof(HTNode));
		for(i=1;i<2*n;i++){
			HT[i].parent = 0;
			HT[i].Lchild = 0;
			HT[i].Rchild = 0;
			HT[i].weight =MAX;
			HT[i].flag = 0;
		}
		for(i=1;i<=n;i++)
			HT[i].weight=z[i].n;
	}
}

int Select(HuffmanTree &HT, int n){
	int i,temp,min;
	for(i=1;i<=n;i++){
		if(HT[i].flag==0){
			temp=HT[i].weight;
			min=i;
			break;
		}
	}
	while(i<=n){
		if(!HT[i].flag&&temp>HT[i].weight){
			temp=HT[i].weight;
			min=i;
		}
		i++;
	}
	HT[min].flag=1;
	return min;
}


void DispHuffmanTree(HuffmanTree HT, int n)
{
	printf("\n结点i\tweight\tparent\tLchild\tRchild\n");
	for(int i=1;i<2*n;i++)
	{
		printf("%d\t",i);
		if(HT[i].weight==MAX)
			printf("%c\t",'-');
		else
			printf("%d\t", HT[i].weight);
		printf("%d\t%d\t%d\n", HT[i].parent, HT[i].Lchild, HT[i].Rchild);
	}
}

void HuffmanCoding(HuffmanTree &HT,HuffmanCode &HC,int *w,int n){
	FILE *pc=fopen("HaffCode.txt","w+");
	if(n<=1) return;
	int m,c,f;
	m=2*n-1;
	HuffmanTree p;
	int i,s1,s2;
	for(p=HT,i=1;i<=n;++i,++p,++w) *p={*w,0,0,0};
	*p++;
	for(;i<=m;++i,++p) *p={0,0,0,0};
	for(i=n+1;i<2*n;i++){
		s1=Select(HT,i-1);
		s2=Select(HT,i-1);
		HT[s1].parent=i;
		HT[s2].parent=i;
		HT[i].Lchild=s1;
		HT[i].Rchild=s2;
		HT[i].weight=HT[s1].weight+HT[s2].weight;
	}
	HC=(HuffmanCode)malloc((n+1)*sizeof(char *));
	char *cd=(char *)malloc(n*sizeof(char));
	cd[n-1]='\0';
	for(int i=1;i<=n;++i){
		int start=n-1;
		for(c=i,f=HT[i].parent;f!=0;c=f,f=HT[f].parent){
			if(HT[f].Lchild==c) cd[--start]='0';
			else cd[--start]='1';
		}
		HC[i]=(char *)malloc((n-start)*sizeof(char));
		strcpy(HC[i],&cd[start]);
	}
	free(cd);
	for(int i=1;i<=n;i++){
		fputs("权值为",pc);
		fputc(z[i].c,pc);
		fputs("的哈夫曼编码:",pc);
		fputs(HC[i],pc);
		fputc('\n',pc);
	}
}

void HuffmanPutF(int n){
	FILE *pu=fopen("EngtoCode.txt","w+");
	FILE *p=fopen("Text.txt","r");
	if((p=fopen("Text.txt","r"))==NULL){
		cout<<"ERROR";
		exit(0);
	}
	int i=1,x;
	char ch='1';
	while(ch!=EOF){
		int page=0;
		ch=fgetc(p);
		for(int j=1;j<=n;j++){
			if(ch==z[j].c){
				x=j;
				break;
			}
		}
		if(ch!=EOF) fprintf(pu,"%s",HC[x]);
	}
	fclose(pu);
	fclose(p);
}

void HuffmanEcoding(HuffmanTree &HT,int n){
	FILE *pe=fopen("EngtoCode.txt","r");
	int p=2*n-1;
	char ch;
	while((ch=fgetc(pe))!=EOF){
		if(ch=='0') p=HT[p].Lchild;
		else p=HT[p].Rchild;
		if(HT[p].Lchild==0&&HT[p].Rchild==0){
			cout<<z[p].c;
			p=2*n-1;
		}
	}
	fclose(pe);
}

int main(){
	int n,w[128];
	HuffmanTree HT;
	n=ReadFile();
	for(int i=1;i<=n;i++)
		w[i]=z[i].n;
	InitHuffmanTree(HT,n);
//	DispHuffmanTree(HT,n);
	HuffmanCoding(HT,HC,w,n);
	HuffmanPutF(n);
//	DispHuffmanTree(HT,n);
	HuffmanEcoding(HT,n);
	return 0;
}

测试  

The discovery of the Omicron version of the coronavirus is demonstrating the risks linked to world vaccine inequality.South African scientists informed the World Health Organization (WHO) last week about the new Omicron version, or variant.The WHO has declared Omicron a "very high" risk worldwide because it contains some "concerning" mutations. A mutation is a change in the genetic material of a virus. Early evidence suggests the new variant may spread more easily than other coronavirus versions.Many countries have closed their borders in an attempt to block new infections of the variant. The United States, Brazil, Britain and the European Union have placed travel restrictions on eight African nations where the variant was first identified.

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值