C++ 哈夫曼树(压缩、加密实现代码)

在以前写LZW压缩算法的时候,遇到很多难受的问题,基本上都在哈夫曼编码中解决了,虽然写这代码很费神,但还是把代码完整的码出来了,毕竟哈夫曼这个思想确实很牛逼。哈夫曼树很巧妙的解决了当时我在LZW序列化的时候想解决的问题,就是压缩后文本的分割。比如用lzw编码abc,就是1,2,3。但这个在存为文件的时候必须用分割符把1,2,3分割开,非常浪费空间,否则会和12 23 123 产生二义性。而哈夫曼树,将所有char分布在叶节点上,在还原的时候,比如1101110,假设110是叶节点,那么走到110的时候就可以确定,已经走到尽头,回到根节点继续走,这样就避免了字符的分割,全部用1010101010101这样的路径表示字符,可以将8位压缩为1个char进行存储。在构造树的时候,将出现率高的char放在上面,这样路径就很短,自然就节省了存储空间。虽然哈夫曼压缩效率不是最高的,但还算比较乐观的。


哈夫曼除了压缩以外还可以用于加密,在将文本用哈夫曼编码时,需持久化生成的char计数链表结构,这样才能还原出树结构,而解码时路径正是依赖于树结构的。也就是说,这种编码是属于约定形式的编码,在编码时用原文本产生树结构,而存储的是树路径,解码的时候缺少树或树结构与原先不相符都是无法完成解码的,就好比,我用10代表a,你存的是10,你将10解释为 b或c等等都是不正确的。由于转换为了char存储,所以还需持久化最后填充的数目、文本长度,才能还原出原先的01表示的文本格式


这个代码有一定缺陷,由于当时考虑的是对文本进行处理,当文件中有char='\0' 时会出现错误,这个代码打的很费神,就不继续修复了,如有需要,可自行更改,解决的办法应该挺多的


先来个运行图:



源代码

#include<iostream>
#include<sstream>
#include<fstream>

void WriteFile(char* path,const char* content,int length,bool append=false);
using namespace std;
struct Node{ 
	char data;
	Node* left;
	Node* right; 
};

struct L_Node{
	int count;
	Node* node;
	L_Node* next;
};

Node* AddNode(int count,char data,L_Node*& first){
	L_Node* lnode=new L_Node();
	lnode->count=count;
	Node* node=new Node();
	node->data=data;
	node->left=0;
	node->right=0;
	lnode->node=node;
	if(first==0){
		first=lnode;
	}
	else{
		if(lnode->count<first->count){
			lnode->next=first;
			first=lnode;
		}
		else{
			L_Node* iter=first;
			
			while(iter->next!=0&&iter->next->count<lnode->count){
				iter=iter->next;
			}
			
			if(iter->next==0){
				iter->next=lnode;
				lnode->next=0;
			}
			else{
				lnode->next=iter->next;
				iter->next=lnode;
			}
		}
	}
	return node;
}

void SaveLNodes(L_Node* first){
	stringstream ss;
	while(first!=0){
		ss<<(int)(unsigned char)first->node->data<<':'<<first->count<<' ';
		first=first->next;
	}
	WriteFile("nodes.txt",ss.str().c_str(),ss.str().length());
}

void GetLNodes(L_Node*& first){
	char temp[32];
	ifstream in;
	in.open("nodes.txt",ios::in|ios::binary);
	while(!in.eof()){
		temp[0]=0;
		in>>temp;
		if(strlen(temp)>0){
			char* data=strtok(temp,":");
			char* count=strtok(0,":");
			AddNode(atoi(count),atoi(data),first);
		}
		
	}
}

void BuildSortedList(char* content,L_Node*& first,int length){
	int array[256]={
		0
	};

	for(int i=0;i<length;i++){
		array[(unsigned char)content[i]]++;
	}

	for(int i=0;i<256;i++){
		if(array[i]>0){
			AddNode(array[i],(char)i,first);
		}
	}
	SaveLNodes(first);
}

void BuildTree(L_Node*& first,Node*& root){//get l1->node,l2->node,remove l1,l2,then put l3 into list,then set l3->left and l3->right
	if(first->next==0){
		Node* node=new Node();
		root=node;
		root->right=0;
		node=new Node();
		node->data=first->node->data;
		node->left=0;
		node->right=0;
		root->left=node;
		delete first;
		return;
	}
		 
	while(first->next!=0){
		int count=first->count+first->next->count;
		Node* node1=first->node;
		L_Node* temp=first;
		first=first->next;
		delete temp;
		Node* node2=first->node;
		temp=first;
		delete temp;
		first=first->next;
		root=AddNode(count,0,first);
		root->left=node1;
		root->right=node2;
		//cout<<(int)node1->data<<':'<<(int)node2->data<<endl;
	}
	delete first;
}

void PreOrderTraversal(Node* node,char* track,int branch,char** table){
	if(node!=0){
		
		char* track2=0;
		
		if(branch==0){
			track2=new char[strlen(track)+2];
			sprintf(track2,"%s0\0",track);
		}
		else if(branch==1){
			track2=new char[strlen(track)+2];
			sprintf(track2,"%s1\0",track);
		}
		else{
			track2=new char[strlen(track)+1];
			sprintf(track2,"%s\0",track);
		}
	
		if(node->data!=0){
			table[(unsigned char)node->data]=track2;
		}
		
		PreOrderTraversal(node->left,track2,0,table);
		PreOrderTraversal(node->right,track2,1,table);
		
	
		if(node->data==0){
			delete track2;
		}
	}
}

void PreOrderTraversal(Node* node){
	if(node!=0){
		cout<<(int)(unsigned char)node->data<<endl;
		PreOrderTraversal(node->left);
		PreOrderTraversal(node->right);
	}
}

char* Encode(const char* content,char** table,int length){
	
	stringstream ss;

	for(int i=0;i<length;i++){
		if((unsigned char)content[i]==0){

		}
		else{
			ss<<table[(unsigned char)content[i]];	
		}
	}
 	
	
	char* encoded_content=new char[ss.str().length()+1];
	memcpy(encoded_content,ss.str().c_str(),ss.str().length());
	encoded_content[ss.str().length()]=0;
	return encoded_content;
}

int BinToDec(char* bin_content){
	int number=0;
	int cur=1;	
	for(int i=7;i>=0;i--){
		number+=(bin_content[i]-'0')*cur;
		cur*=2;
	}
	return number;
} 

char* BinToCharText(const char* bin_content,int& fill_count,int& save_length){
	int length=strlen(bin_content);
	
	fill_count=8-length%8;

	if(fill_count>0){
		char* temp=new char[length+fill_count+1];
		
		char temp1[fill_count];
		for(int i=0;i<fill_count;i++){
			temp1[i]='0';
		}
		
		sprintf(temp,"%s%s",bin_content,temp1);
		temp[length+fill_count]=0;
		bin_content=temp;
	}
	
	length+=fill_count;
	
	save_length=length/8;

	char* text=new char[length/8+1];
	for(int i=0;i<length;i+=8){
		char temp[8];
	 	memcpy(temp,bin_content+i,8);
	 	text[i/8]=(char)BinToDec(temp);
	 
	}
	text[length/8]=0;
	
	if(fill_count>0){
		delete bin_content;
	}
	
	return text;
}

char* DecToBin(int num){
	char* bin=new char[8];
	if(num<0){
		num=256+num;
	}
	
	for(int i=7;i>=0;i--){
		bin[i]=num%2+'0';
		num/=2;
	}
	return bin;
}

char* CharTextToBin(char* text,int fill_count,int save_length){
	int length=save_length;

	char* content=new char[8*length+1];
	
	int pos=0;
	for(int i=0;i<length;i++){
 		int number=text[i];
		char* bin=DecToBin(number);
		memcpy(content+pos,bin,8);
		pos+=8;
		delete bin;
	}
	
	content[8*length-fill_count]=0;

	return content;
}

char* Decode(const char* encode_content,Node* tree){
	stringstream ss;
	Node* node=tree;

	for(int i=0;i<strlen(encode_content);i++){
		if(encode_content[i]=='0'){
			node=node->left;
		}
		else if(encode_content[i]=='1'){
			node=node->right;
		}

		if(node->data!=0){
			ss<<node->data;
			node=tree;
		}
	}
	char* decode_content=new char[ss.str().length()+1];
	memcpy(decode_content,ss.str().c_str(),ss.str().length());
	decode_content[ss.str().length()]=0;
	return decode_content;
}


void ReleaseTable(char** table){
	for(int i=0;i<256;i++){
		if(table[i]!=0){
			delete table[i];
		}
	}
}

void PostOrderTraversal(Node* node){
	if(node!=0){
		PostOrderTraversal(node->left);
		PostOrderTraversal(node->right);
		delete node;
	}
} 

char* ReadFile(char* path,long& length){
	char* content=0;
	ifstream in;
	in.open(path,ios::in|ios::binary);
	in.seekg(0,ios::end);
	length=in.tellg();
	content=new char[length+1];
	in.seekg(0,ios::beg);
	int i=0;
	while(!in.eof()){
		content[i++]=in.get();
	}
	content[length]=0;
	in.close();
	return content;
}

char* ReadFile(char* path,int& fill_count,int& save_length){
	char* content=0;
	ifstream in;
	in.open(path,ios::in|ios::binary);
	in>>fill_count>>save_length;
	long cur=in.tellg()+(long)1;
	in.seekg(0,ios::end);
	long length=in.tellg()-cur;
	content=new char[length+1];
	in.seekg(cur,ios::beg);
	int i=0;
	while(!in.eof()){
		content[i++]=in.get();
	}
	content[length]=0;
	in.close();
	return content;
}

void WriteFile(char* path,const char* content,int length,bool append){
	ofstream out;
	if(append){
		out.open(path,ios::out|ios::binary|ios::app);
	}
	else{
		out.open(path,ios::out|ios::binary);
	}
 	
 	out.write(content,length);
 	out.close();
}

int main(){
	long length;
	char* content=ReadFile("content.txt",length);

	L_Node* first=0;
	
	BuildSortedList(content,first,length); //create nodes list and save to nodes file
	//GetLNodes(first);//get and recreate nodes from file

	Node* root=0;//used for buildtable and decode
	BuildTree(first,root);//build tree by nodes list and release sorted list
	
	char* table[256]={//build table,used for encode
		0
	};

	PreOrderTraversal(root,"",-1,table);//create table
	

 	char* encode_content=Encode(content,table,length);//convert content to encoded bin text
 	cout<<encode_content<<endl;
 	delete content;
 	
 	ReleaseTable(table);//release table when encode finished
	
	
	int fill_count;
 	int save_length;
 	char* save_text=BinToCharText(encode_content,fill_count,save_length);//convert encoded bin text to char text and save these text to file
 	delete encode_content;
 	char head_info[32];
 	sprintf(head_info,"%d %d ",fill_count,save_length);
 	WriteFile("encoded_content.txt",head_info,strlen(head_info));
	WriteFile("encoded_content.txt",save_text,save_length,true);
	delete save_text;
	save_text=ReadFile("encoded_content.txt",fill_count,save_length);//read fill_count、save_length、encoded char text from file
 	
	char* bin_text= CharTextToBin(save_text,fill_count,save_length);//convert char text to bin text
	delete save_text;
	
	char* decode_content=Decode(bin_text,root);//decode by bin_text and tree
	cout<<decode_content<<endl;
	delete bin_text;
	delete decode_content;
	
	
	PostOrderTraversal(root);//release tree
	
	return 0;
}


  • 5
    点赞
  • 21
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值