正在恶补数据结构,今天看到了二叉树,huffman编码,发现压缩程序很有意思,就按照huffman的编码思想实现了一个,没有经过算法改进,但是没有用第三方库,还能压缩一点空间,花了一天写完的。编码效率还是很菜。
只要调用compress 和decompress就可以压缩,解压,当然不是zip和rar,离他们还差十万八千里啊!
#include <iostream>
#define LEFT 0
#define RIGHT 1
using namespace std;
typedef struct huffmanTreetype{
huffmanTreetype(){
weight = 0;
huffmancode = NULL;
codelen = 0;
list_next = NULL;
list_prev = NULL;
parent = NULL;
left_child = NULL;
right_child = NULL;
isleafnode = true;
l_or_r = 2;
frequency = 0;
}
void printCode(){
cout<<ch<<" huffmancode:";
for(int i = 0 ;i < codelen; i++){
cout<<(int)huffmancode[i];
}
cout<<endl;
}
char *huffmancode;
char codelen;
char l_or_r;
unsigned char ch;
bool isleafnode;
float weight;
int frequency;
struct huffmanTreetype *list_next,*list_prev,*parent,*left_child,*right_child;
}huffmanTree;
huffmanTree *leaflist_header = NULL; //叶子节点头
huffmanTree *listtree_header = NULL; //huffman树
int compress_len = 0; //压缩后字节总长
int compresscount = 0;
float compressprogress = 0; //压缩进度
//加入链表末尾
void appendList(huffmanTree *const header,huffmanTree *node){
huffmanTree *iterator = header;
while(iterator->list_next != NULL){
iterator = iterator->list_next;
}
iterator->list_next = node;
node->list_prev = iterator;
node->list_next = NULL;
}
//断开链表中的元素,但不销毁
huffmanTree* cutElement(huffmanTree *const header,huffmanTree *node){
huffmanTree *iterator = header;
huffmanTree *newheader = header;
bool modify = false;
while(iterator != NULL){
if(iterator == node){
modify = true;
huffmanTree *node_prev = iterator->list_prev;
huffmanTree *node_next = iterator->list_next;
if(node_prev != NULL){
node_prev->list_next = node_next;
}
if(node_next != NULL){
node_next->list_prev = node_prev;
}
if(iterator == header){
newheader = iterator->list_next;
newheader->list_prev = NULL;
}
}
iterator = iterator->list_next;
}
//把节点从链表中完全断开
if(modify){
node->list_prev = NULL;
node->list_next = NULL;
}
return newheader;
}
//链表长度
int listlength(huffmanTree *const header){
huffmanTree *iterator = header;
int len = 0;
while(iterator != NULL){
len++;
iterator = iterator->list_next;
}
return len;
}
//打印链表
void printlist(huffmanTree *const header){
huffmanTree *iterator = header;
while(iterator != NULL){
cout<<"("<<iterator->ch<<" "<<iterator->weight<<") ";
iterator = iterator->list_next;
}
cout<<endl;
}
//先序取出叶子节点
void preOrderTree(huffmanTree *rootnode){
if(rootnode != NULL){
if(rootnode->isleafnode){
//cout<<rootnode->ch<<" ";
//如果为叶子节点,把它加入叶子链表中
if(leaflist_header == NULL){
leaflist_header = rootnode;
}else{
appendList(leaflist_header,rootnode);
}
}
preOrderTree(rootnode->left_child);
preOrderTree(rootnode->right_child);
}
}
//设置huffman编码
void setfuffmanCode(huffmanTree *const listheader){
huffmanTree *list_iterator = listheader;
huffmanTree *tree_iterator = NULL;
while(list_iterator != NULL){
//左支为0,右支为1
tree_iterator = list_iterator;
//首先要计算编码有多少位
while(tree_iterator->parent != NULL){
list_iterator->codelen++;
tree_iterator = tree_iterator->parent;
}//tree while
//为code分配空间
list_iterator->huffmancode = new char[list_iterator->codelen];
tree_iterator = list_iterator;
compress_len += (list_iterator->frequency * list_iterator->codelen);
for(int i = list_iterator->codelen - 1;i >= 0 ;i--){
list_iterator->huffmancode[i] = tree_iterator->l_or_r;
tree_iterator = tree_iterator->parent;
}
//list_iterator->printCode();
list_iterator = list_iterator->list_next;
} //list while
int leavetemp = compress_len%8;
compress_len = compress_len/8;
if(leavetemp != 0){
compress_len ++;
}
}
//选中剩下节点中两个最小的
huffmanTree* findLasttwo(huffmanTree * header,huffmanTree **lasttwo){
huffmanTree *iterator = header;
lasttwo[0] = iterator;
iterator = iterator->list_next;
while(iterator != NULL){
if(iterator->weight < lasttwo[0]->weight){
lasttwo[0] = iterator;
}
iterator = iterator->list_next;
} //end while
//找倒数第二的 ,先解除最小的,最后加上最小的
header = cutElement(header,lasttwo[0]);
iterator = header;
lasttwo[1] = iterator;
iterator = iterator->list_next;
while(iterator != NULL){
if(iterator->weight < lasttwo[1]->weight){
lasttwo[1] = iterator;
}
iterator = iterator->list_next;
} //end while
appendList(header,lasttwo[0]);
return header;
}
//根据统计数据创建huffman树
void createHuffTree(int *statistics){
//构造huffman树
int total_frequency = 0;
for(int i = 0 ;i < 256 ;i++){
total_frequency += statistics[i];
}
for(int i = 0 ;i < 256 ;i++){
if(statistics[i] != 0){
//计算权值
huffmanTree *node = new huffmanTree;
node->ch = i;
node->frequency = statistics[i];
node->weight = statistics[i]/(float)total_frequency;
if(listtree_header == NULL){
listtree_header = node;
}else{
appendList(listtree_header,node);
}
//cout<<(char)i<<": weight "<<node->weight<<endl;
}
}
//直到链表中只有一个元素才停止构造树
//选中剩下节点中两个权值非0最小的构造新节点
huffmanTree* lasttwo[2];
//printlist(listtree_header);
while(listlength(listtree_header) >= 2){//链表中至少有两个元素
listtree_header = findLasttwo(listtree_header,lasttwo);
//cout<<"listlen:"<<listlength(list_header)<<endl;
//cout<<"last two 0 :"<<lasttwo[0]->ch<<" 1:"<<lasttwo[1]->ch<<endl;
//开始生成树结构
huffmanTree *node = new huffmanTree;
lasttwo[0]->parent = node;
lasttwo[0]->l_or_r = LEFT;
lasttwo[1]->parent = node;
lasttwo[1]->l_or_r = RIGHT;
node->ch = '#';
node->isleafnode = false;
node->weight = lasttwo[0]->weight + lasttwo[1]->weight;
node->left_child = lasttwo[0];
node->right_child = lasttwo[1];
//将链表重新链接好,可能把链表头都合并了
appendList(listtree_header,node);
listtree_header = cutElement(listtree_header,lasttwo[0]);
listtree_header = cutElement(listtree_header,lasttwo[1]);
//printlist(listtree_header);
}
}
int findCode(huffmanTree *const header,unsigned char src_byte,char **code){
huffmanTree *iterator = header;
while(iterator != NULL){
if(iterator->ch == src_byte){
*code = iterator->huffmancode;
return iterator->codelen;
}
iterator = iterator->list_next;
}
return 0;
}
#define setbit(x,y) x|=(1<<y) //将X的第Y位置1
#define getbit(x,y) (x&(1<<y))>>y //读取x的第y位
//由huffman压缩数据
void huffCompress(huffmanTree *const listheader,unsigned char *src,int src_len,unsigned char *dest,int dest_len){
//扫描源数组,找到相应编码,写入位中
//一次编码不够8bit记录下来,下次接着写
//
char *codeaddr;
int destbitindex = 0; //总bit索引
int destbyteindex = 0; //byteindex由bitindex计算得来
for(int i = 0 ;i < src_len ;i++){
compresscount++;
compressprogress = compresscount/(float)src_len;
if(compresscount%500000 == 0){
cout<<"progress:"<<compressprogress * 100<<"%"<<endl;
}
int codelen = findCode(listheader,src[i],&codeaddr);
//先把上次遗留的bit位填满
int codebitscount = 0;
//填写上次空位,从左边高位开始
for(int j = 0 ;j < codelen;j++){
if(codeaddr[codebitscount++] == 1){
destbyteindex = destbitindex / 8;
int bitoffset = destbitindex % 8; //离左边的偏移量
setbit(dest[destbyteindex],7 - bitoffset);
}
destbitindex++;
}
}
}
//传进huff树,源二进制串,目标串,目标串长度(此即解压后的长度)
void huffDecompress(huffmanTree *const treeheader,unsigned char *binary_src,int src_len,unsigned char *dest,int dest_len){
int bitindex = 0;
int byteindex = 0;
int destbyteindex = 0;
huffmanTree *iterator = treeheader;
// cout<<"数字:";
//for(int i = 0 ;i < src_len;i++){
//int a = binary_src[i];
//char binbuf[32]; //存储二进制字串的空间
//printf("%s", itoa(a, binbuf, 2)); //最后一个参数2表示2进制
//}
//cout<<endl<<"取数:";
while(true){
if(iterator->isleafnode){
//查到了叶子节点
dest[destbyteindex] = iterator->ch;
iterator = treeheader; //迭代器归位
destbyteindex++;
if(destbyteindex == dest_len)break;
}
byteindex = bitindex / 8;
int bitoffset = bitindex % 8; //离左边的偏移量
//读取源二进制串的一位,直到找到叶子节点
int srcbit = getbit(binary_src[byteindex],7 - bitoffset);
//cout<<srcbit;
if(srcbit == 0){
iterator = iterator->left_child; //左节点
}
if(srcbit == 1){
iterator = iterator->right_child;
}
bitindex++;
}
}
//压缩文件
void compressFile(char *srcfilename){
int srcfilenamelen = strlen(srcfilename);
char compressname[srcfilenamelen + 15];
compressname[0] = '\0';
char suffix[] = ".huffman-YU";
strcat(compressname,srcfilename);
strcat(compressname,suffix);
FILE *srcfile = fopen(srcfilename,"rb+");
fseek(srcfile,0,SEEK_END);
int file_len = ftell(srcfile);
fseek(srcfile,0,SEEK_SET);
unsigned char * filememory = new unsigned char[file_len];
fread(filememory,1,file_len,srcfile);
int statistics[256];
for(int i = 0 ;i < 256 ;i++){
statistics[i] = 0;
}
for(int i = 0 ;i < file_len ;i++){
statistics[filememory[i]]++;
}
//用统计数据创建huffman树
createHuffTree(statistics);
//给叶子节点编码
//先把叶子节点取出
preOrderTree(listtree_header);
//cout<<endl<<"leaf:"<<endl;
//cout<<"listlen:"<<listlength(leaflist_header);
//printlist(leaflist_header);
//设置叶子节点的huffman编码
setfuffmanCode(leaflist_header);
//根据huffman编码压缩数据
unsigned char * compress_dest = new unsigned char[compress_len];
//清零
for(int i = 0 ;i < compress_len;i++){
compress_dest[i] = 0;
}
huffCompress(leaflist_header,filememory,file_len,compress_dest,compress_len);
cout<<"compress_len:"<<compress_len<<endl;
//将压缩后的数据写入文件
//先写文件名长度,文件名,256个统计数据
FILE * compress_file = fopen(compressname,"wb+");
fwrite(&srcfilenamelen,sizeof(int),1,compress_file);
fwrite(srcfilename,sizeof(char),srcfilenamelen,compress_file);
fwrite(statistics,sizeof(int),256,compress_file);
//写入压缩数据长度和数据
fwrite(&compress_len,sizeof(int),1,compress_file);
fwrite(compress_dest,sizeof(char),compress_len,compress_file);
fclose(srcfile);
fclose(compress_file);
}
//解压文件
void decompressFile(char *srcfilename){
cout<<endl<<"解压:";
FILE *srcfile = fopen(srcfilename,"rb+");
//读取文件名
int filenamelen;
fread(&filenamelen,sizeof(int),1,srcfile);
char *filename = new char[filenamelen + 1];
fread(filename,sizeof(char),filenamelen,srcfile);
filename[filenamelen] = '\0';
//读取统计数据
int statistics[256];
int compressdatalen;
fread(statistics,sizeof(int),256,srcfile);
fread(&compressdatalen,sizeof(int),1,srcfile);
unsigned char *compressdata = new unsigned char[compressdatalen];
fread(compressdata,sizeof(char),compressdatalen,srcfile);
int decompressdatalen = 0;
for(int i = 0 ;i < 256; i++){
decompressdatalen += statistics[i];
}
cout<<"decompressdatalen: "<<decompressdatalen<<endl;
createHuffTree(statistics);
//给叶子节点编码
//先把叶子节点取出
preOrderTree(listtree_header);
//设置叶子节点的huffman编码
setfuffmanCode(leaflist_header);
//解压
unsigned char *decompressdata = new unsigned char[decompressdatalen];
huffDecompress(listtree_header,compressdata,compressdatalen,decompressdata,decompressdatalen);
char attachchars[] = "copy-";
char copyfilename[100];
copyfilename[0] = '\0';
strcat(copyfilename,attachchars);
strcat(copyfilename,filename);
cout<<"copyname:"<<copyfilename<<endl;
FILE *destfile = fopen(copyfilename,"wb+");
fseek(destfile,0,SEEK_SET);
fwrite(decompressdata,sizeof(char),decompressdatalen,destfile);
fclose(srcfile);
fclose(destfile);
}
main(){
//compressFile("iphone.pdf");
decompressFile("iphone.pdf.huffman-YU");
system("PAUSE");
}