Huffman编码学习总结

Huffman编码是什么?

Huffman编码是一种无失真的信源编码方式。它通过根据字符出现的概率来分配长码与短码,从而达到降低平均码长的目的,实现压缩效果。

Huffman编码的实现方法

Huffman编码的具体实现方法是采用自底向上构造二叉树的方法,就是对所有信源符号按概率从高到低进行排序,每次合并概率最低的两个符号的概率,作为一个新的符号的概率,然后对所有的符号概率再重新排序合并概率最低的两个符号,这一过程一直持续,直到最后概率合并为1.

Huffman压缩文件

压缩文件由Encoder类进行实现,Encoder类的定义如下:

#pragma once
#ifndef ENCODER_H
#define ENCODER_H

#include"huffmantree.h"
#include<vector>
#include<fstream>
#include<iostream>
#include <algorithm>
using namespace std;


class Encoder 
{
private :
    const int size = 256;
	vector<int> counter;
	ifstream input;
	ofstream output;
	vector<HuffmanTree*> leaf;
	vector<vector<unsigned char>> p; // 码表
	HuffmanTree* tree_head;
	int file_size;



	bool openFile(string source_file, string target_file);
	void statistics();// 统计各个字符出现次数,并进行排序
	void createTree();// 生成huffman树
	void destroyTree(HuffmanTree* root);
	void creatTable(HuffmanTree* node, vector<char> temp, int len);
	
public:
	Encoder();
	void compress(string source_file, string target_file); // 压缩编码
	~Encoder();

};


#endif

HuffmanTree的定义

#pragma once
#ifndef HUFFMANTREE_H
#define HUFFMANTREE_H

struct  HuffmanTree
{
	HuffmanTree* lchild;
	HuffmanTree* rchild;
	int weight;  //出现次数
	unsigned char data;
	unsigned char code;  // 左1右0
};

#endif

下面我将重点说明一下statistics()、createTree()、creatTable()、和 compress()

统计文件个字符出现的次数

void Encoder::statistics()
{
		unsigned char buff;
		while (input >> noskipws >> buff)
		{
			counter.at(buff)++;
			file_size++;
		}
		cout << "the size of file is " << file_size << " byte"<<endl;
		input.clear();
		input.seekg(0, ios::beg);	//将文件指针指向开头
}

这里的文件需要用二进制打开,否则在Windows下会出现文档中0X1A被认作为EOF的情况,使得读取文件异常终止。

生成Huffman树

void Encoder::createTree()
{	
	for (int i = 0; i < size; ++i)
	{
		leaf.at(i) = new HuffmanTree;
		leaf.at(i)->weight = counter.at(i);
		leaf.at(i)->code = i;
		leaf.at(i)->data = i;
		leaf.at(i)->lchild = NULL;
		leaf.at(i)->rchild = NULL;
	} //初始化
	HuffmanTree* lnode;
	HuffmanTree* rnode;

	HuffmanTree *f;
	//f = new HuffmanTree[size];
	while (leaf.size() > 1)
	{
		f = new HuffmanTree;
		//从大到小排序,每次取最后两个
		sort(leaf.begin(), leaf.end(), [=](HuffmanTree* i1, HuffmanTree* i2) {
			return i1->weight > i2->weight;
			}); 
		lnode = leaf.back();
		leaf.pop_back();
		rnode = leaf.back();
		leaf.pop_back();

		f->lchild = lnode;
		lnode->code = 1; //左1右0
		f->rchild = rnode;
		rnode->code = 0;
		f->weight = rnode->weight + lnode->weight;
		f->code = 0;
		leaf.push_back(f);
		if (leaf.size() == 1)
		{
			tree_head = f;
			leaf.pop_back();
			return;
		}
		f++;
	}
	return ;
}

对所有信源符号按概率从高到低进行排序,每次合并概率最低的两个符号的概率,作为一个新的符号的概率,然后对所有的符号概率再重新排序合并概率最低的两个符号

生成码表

void Encoder::creatTable(HuffmanTree* node, vector<char> temp, int len)
{   

	if (node != NULL)
	{
		if (node->lchild == NULL && node->rchild == NULL)  
		{
			temp.at(len) = node->code;
			p.at(node->data).assign(temp.begin()+1, temp.begin()+len+1);//用来存放码表
		}
		else
		{
			temp.at(len++) = node->code;
			creatTable(node->lchild, temp, len);
			creatTable(node->rchild, temp, len);
		}
	}
}

利用递归遍历二叉树来生成码表

对文件进行压缩

void Encoder::compress(string source_file, string target_file)
{
	vector<char> temp(size, 0);
	unsigned char buff;  //存放读入的字符
	unsigned char out = 0; //输出的字符
	vector<char> code;  //存放字符的编码

	openFile(source_file, target_file); //打开输入输出文件,必须要用二进制打开
	statistics(); //统计各个字符出现次数
	createTree();//生成Huffman树
	creatTable(tree_head, temp, 0);//构造码表

	for (vector<int>::iterator it = counter.begin(); it != counter.end(); ++it)
	{
		output << *it << endl;
	}//写入每个字符出现的次数,解压时根据这个构建Huffman树
	while (input >> noskipws >> buff)
	{
		code.insert(code.end(), p[buff].begin(), p[buff].end());
		while (code.size() >= 8) //当编码中0和1位数超过8时,将他们转化成一个字符输出
		{
			out = 0;
			for (int j = 0; j < 8; ++j)
			{
				out = out ^ (code.at(j) << j);
			}
			output << out;

			vector<char>::const_iterator First = code.begin() + 8; 
			vector<char>::const_iterator Second = code.end();
			code.assign(First, Second);  //将输出的编码删除

		}
	}
	if (code.size() != 0) //多出来不足八位的编码进行补零操作
	{
		for (int j = 0; j < code.size(); ++j)
		{
			out = out ^ (code.at(j) << j);
		}
		output << out;
		output << code.size(); //这个记录的是最后字符的长度,解压其实并没有用到这个信息,可以删除
	}
	output.close();
	input.close();
}

压缩时一定要将码表先写入到压缩文件中,否者无法对文件进行解压缩,同时写入字符对应的编码时,编码中的0和1是要当作位信息写入的,我这里将编码后的0和1每八个为一组当成一个字节写入到文件中去的,最后不足8位的进行补零。

Huffman解压文件

void Decoder::deCompress(string sourece_file,string target_file) // 解压
{
	HuffmanTree* p;
	unsigned char buff;
	int count = 0;
	int temp;
	openFile(sourece_file, target_file);
	statistics();
	createTree();
	p = root;
	for (vector<int>::iterator it = counter.begin(); it != counter.end(); ++it)
	{
		file_size += *it;
	}
	input >> noskipws >> buff;
	while (input >> noskipws >> buff)
	{
	
		for (int i = 0; i < 8; ++i)
		{

			if (!(p->lchild == NULL && p->rchild == NULL))
			{
				temp = (buff & (1 << i)) ? 1 : 0;
				//cout << temp;
				if (temp == 0)
				{
					p = p->rchild;
				}
				else
					p = p->lchild;
			}
			else
			{
				output << p->data;
				p = root;
				count++;
				if (count == file_size)
				{
					cout << "decompress successfully";
					return;
				}
			}		
		}
	}
	input.close();
	output.close();
}

解压文件的代码大部分和压缩文件时的一样,这里比较重要的代码如上所示。主要是以二进制形式读取压缩文件中的每一个字符,并且将每一个字符按位拆分为01序列。从树的根开始,0寻找右子树,1寻找左子树,直到找到一个叶子节点,说明译码成功。将指针重新指向根节点重复该过程即可完成解压。

操作

#include"decoder.h"
#include"encoder.h"
//#include"vld.h"
int main()
{
	Encoder en;
	Decoder de;
	string source =R"(.\img\2.bmp)"; //目标文件
	string temp = R"(.\output\2.bin)"; //压缩后文件
	string target = R"(.\output\2.bmp)"; //解压后文件
	en.compress(source, temp);
	de.deCompress(temp, target);
	return 1;
}

Huffman赫夫曼编码压缩文本文件

#include "zdown.h"
 
int main(int argc, char *argv[])
{
 
    if(argc == 1)
    {
        printf("提示:\n");
        printf("-z “zdown -z 待压缩文件名 解码字典文件名 编码后文件名” 格式压缩文件\n");
        printf("-r “zdown -r 编码后文件名 解码字典文件名 还原后文件名” 格式解压文件\n");
        return 0;
    }
 
    if(argc != 5)
    {
        fprintf(stderr, "Wrong parameter input!\n");
        exit(EXIT_FAILURE);
    }
    
    if(strcmp(argv[1], "-z") == 0)
        _zd(argv);
    else if(strcmp(argv[1], "-r") == 0)
        _ud(argv);
    else {
        fprintf(stderr, "Wrong option input!\n");
        exit(EXIT_FAILURE);
    }
    
    return 0;
}
 
 
void _zd(char *argv[])//压缩函数
{
    int i, n;
    int count[SIZE] = {0};
    unsigned long m;
    struct tree Hefuman[SIZE];
    struct codes Code[SIZE];
    //fp1待压缩文件,fp2压缩后文件,fp3生成的解压字典文件
    FILE *fp1, *fp2, *fp3;
    fp1 = fopen(argv[2], "r");
    if(fp1 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[2]);
        exit (EXIT_FAILURE);
    }
    Count(fp1, count);//计数
    rewind(fp1);
 
    n = creat_tree(Hefuman, count);//生成赫夫曼树
    
    creat_codes(Hefuman, Code, n);//建立赫夫曼编码数组
    
    fp2 = fopen(argv[4], "wb");
    if(fp2 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[4]);
        exit (EXIT_FAILURE);
    }
    
    m = zdown(fp1, fp2, Code);//压缩,返回压缩编码的位数
 
    fclose(fp1);
    fclose(fp2);
    
    fp3 = fopen(argv[3], "wb");//保存解压字典(赫夫曼树)
    if(fp3 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[3]);
        exit (EXIT_FAILURE);
    }
    fwrite(&m, sizeof(unsigned long), 1, fp3);//存入压缩编码的位数,
    fwrite(Hefuman, sizeof(struct tree), SIZE, fp3);//存入赫夫曼树
    fclose(fp3);
 
}
 
void _ud(char *argv[])
{
 
    FILE *fp2, *fp3, *fp4;//fp2已压缩文件,fp3解压字典文件,fp4还原后的文件
 
    fp2 = fopen(argv[2], "rb");
    fp3 = fopen(argv[3], "rb");
    fp4 = fopen(argv[4], "w");
    if(fp2 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[2]);
        exit (EXIT_FAILURE);
    }
 
    if(fp3 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[3]);
        exit (EXIT_FAILURE);
    }
    if(fp4 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[4]);
        exit (EXIT_FAILURE);
    }
    unzdown(fp2, fp3, fp4);//解压
}

1.统计待压缩文本各个字符出现的次数

int count[128] = {0};\\用来计数的数组
 
void Count(FILE *fp, int *count) \\fp是打开待压缩文件后获得的文件指针,
{
    int ch;
    while ((ch = getc(fp)) != EOF)\\每读到一个字符,就在把count数组相应位置的值加一
        count[ch]++;
}

2.依据统计出来的字数出现次数表建立赫夫曼树

struct tree{
    int parents;        \\父亲节点
    int ltree;          \\左子树
    int rtree;          \\右子树
    int weight;         \\权值,字符出现次数
    int value;          \\字符的ascii编码
};
 
struct tree Hefuman[256];
 
int creat_tree(struct tree *Hefuman, int *count)
{
    int i, j, n;
    int m1, m2, x1, x2;//m1存放最小权值,x1存放最小权值下标
                       //m2存放第二小的权值,x2存放其下标
 
    for(i = 0; i < SIZE; i++)    //初始化赫夫曼树数组
    {
        Hefuman[i].ltree = 0;
        Hefuman[i].parents = 0;
        Hefuman[i].rtree = 0;
        Hefuman[i].weight = 0;
        Hefuman[i].value = 129;    
    }//将所有字符编码的值设置成129这个不存在的ascii编码的值,设置成0的话,可能会干扰\0字符节点
 
    for(i = 0, j = 0; i < SIZE; i++)  //将count数组中统计出来的字符数装载在树节点数组中
    {
        if(count[i] == 0)    //没有出现的字符忽略
            continue;
        Hefuman[j].weight = count[i];
        Hefuman[j].value = i;
        j++;    //用来统计有多少种字符出现过
    }
    
    n = j;
    
    for(i = 0; i < n - 1; i++)//出现了n种字符,所以,要取出两个最小权值的数n-1次。
    {
        m1 = m2 = INT_MAX;     //先将m1,m2设置成int类型的最大值,方便比较
        x1 = x2 = 0;           //x1,x2设置成下标0
 
        for(j = 0; j < n + i; j++)//在已经存在的节点中寻找,原本就有n个节点,外层循环每循环一次,多一个节点,所以范围是0到n+i;
        {
             //判断节点j的权值是不是小于m1,且父节点为0(说明之前没有被选到过,是剩下的节点)
            if(Hefuman[j].weight < m1 && Hefuman[j].parents == 0)
            {
                m2 = m1;//如果j节点的值比m1小,那么现在j的值就是最小的,m1的是第二小的
                x2 = x1;//所以要把m1和x1中保存的第二小的信息交给m2和x2
                x1 = j;//x1和m1保存j这个权值最小的信息
                m1 = Hefuman[j].weight;
            }
            else if(Hefuman[j].weight < m2 && Hefuman[j].parents == 0)
            {
                x2 = j;//如果j节点的权值处于m1和m2之间,那么就把j节点的信息交给m2和x2保存
                m2 = Hefuman[j].weight;
            }
        }
//已经找出剩余节点中权值最小的两个节点的信息了,开始构建新节点,新节点的下标正好放在n+i这个空位置
        Hefuman[j].weight = m1 + m2;//新节点的权重等于m1+m2
        Hefuman[j].rtree = x1;//新节点右子节点的下标为x1
        Hefuman[j].ltree = x2;//新节点左子节点的下标为x2
 
        Hefuman[x1].parents = j;//变更x1,x2父亲节点的下标
        Hefuman[x2].parents = j;
    }
 
 
    return n;
}

3.遍历赫夫曼树的每个叶子节点,记录每个叶子节点的编码和所代表的字符生成编码表

struct codes{
    unsigned long code;
    int bits;
};
 
struct codes Code[256];
 
void creat_codes(struct tree *Hefuman, struct codes *Code, int n)
{
    int i, p, j;
    struct codes ch;
 
    for(i = 0; i < n; i++)
    {
        p = Hefuman[i].parents;//找到叶子节点i的父节点下标
        ch = (struct codes) {0, 0};//初始化一个编码单元的内容
        j = i;
        while (p != 0)//如果节点i的不是根节点则进入循环(只有根节点的parents=0)
        {
            ch.bits++;//编码位数加一
            if(Hefuman[p].ltree == j)
                ch.code <<= 1;
//如果i的父节点p的左子节点是i,那么把code左移一位,相当于添加了一位0到code最低位
            else if(Hefuman[p].rtree == j)
            {
//如果i的父节点p的右子节点是i,那么把code左移一位,并自增加1,相当于添加一个1到code最低位
                ch.code <<= 1;
                ch.code++;
            }
            j = p;//j用来保留p现在的下标值,用来在下次循环中测试是其父节点的左子还是右子
            p = Hefuman[p].parents;//p更换成其父亲节点的下标
        }
 
        Code[Hefuman[i].value] = ch;
//得到完整路径编码后(反的),把ch的信息保存在其ascii编码值的位置,ch存放的是a的编码信息
//编码为011,3位,就把ch里的这两个信息放入Code数组的下标‘a’的位置,方便等会压缩的时候找到
    }
    
}

4.根据编码表将待压缩文本中的每一个字符按顺序变成编码存储到压缩后的文件中,并将制作的赫夫曼树存储到另一个字典文件中,用来解压缩时使用。

//fp1是用只读模式打开的待压缩文本,fp2是以二进制写模式打开的压缩后文件,Code是之前生成的编码数组
unsigned long zdown(FILE *fp1, FILE *fp2, struct codes *Code)
{
    int ch, i, tab;
    unsigned long buf = 0, count = 0;//count用来计算,buf用来暂时存储编码值
    tab = 0;//tab计数
 
    while((ch = getc(fp1)) != EOF)//从待压缩文件中读取一个字符,直到文件末尾
    {
    //从编码数组中找到ch字符的编码和编码位数
        for(i = 0; i < Code[ch].bits; i++)
        {
        //因为我们之前得到的编码都是反的,所以应该从最低位开始取出
        //我们要取出编码的某一位,可以用编码来做相应位数的&运算
        //第0位就&运算0,第一位就&运算2的1次方,以此类推,就可以把相应位数的编码取出来
        //并放在n里面
            unsigned long n = Code[ch].code & (unsigned long)pow(2, i);
        //每次取出一位编码都要放在我们的buf中,所以buf左移一位腾出位置
            buf <<= 1;
            if(n != 0)
                buf++;
        //如果n为零,那么取出的编码是0,那么上一步左移一位就相当于把0添加进最后一位了
        //如果n不为零,就说明取出的是1,所以就buf就加1,相当于把1添加到末尾
            tab++;//每添加一位编码tab加一
            if(tab == sizeof(buf) * 8)//如果buf已经存储满了,就把buf的内容存放到fp2中
            {
                fwrite(&buf, sizeof(unsigned long), 1, fp2);
                tab = buf = 0;//tab和buf置0
                count++;//记录存放了多少次buf
            }
        }
    }
 
    if(tab != 0)//如果tab不为0,就说明,buf还又最后一些数据没有存放到fp2中
    {
    //最后一次的buf只有低tab位的数据是有效数据,我们把这tab位移动到最高位去,使低位的数据无效
        buf <<= (sizeof(unsigned long)*8 - tab);
        fwrite(&buf, sizeof(unsigned long), 1, fp2);
    }
    //计算总共有多少位编码
    count = count * sizeof(unsigned long) * 8 + tab;
    return count;
}

5.解压缩,根据字典文件,和压缩后的文件,还原已经压缩的文件。

//fp2已压缩文件,fp3解压字典文件,fp4还原后的文件
void unzdown(FILE *fp2, FILE *fp3, FILE *fp4)
{
    int i, root, p, q;
    unsigned long count, buf, flag;
    struct tree arr[SIZE];//存放赫夫曼树用来解压
    
    fread(&count, sizeof(unsigned long), 1, fp3);//从fp3中取出压缩后的编码位数
    fread(&arr, sizeof(struct tree), SIZE, fp3);//取出赫夫曼树
 
 
 
    for (i = 0; i < SIZE; i++)//找出根节点
    {
        if(arr[i].parents == 0)
            break;
    }
 
    root = i;//记录根节点
    p = root;//让p记录根节点
    //压缩编码位数是count位,所以一共会找count次
    for(i = 0; i < count; i++)
    {
        //如果buf里的每一位都已经被转换过,那么就去fp2中重新出编码位
        if(i % (sizeof(unsigned long) * 8) == 0)
            fread(&buf, sizeof(unsigned long), 1, fp2);
        //压缩的时候我们是从高位存放的,所以取编码位也应该从高位取
        //用unsign long的位数减去i对unsigned long取模的值,就是这次循环要取的那一位
        flag = buf & (unsigned long)pow(2, 8*sizeof (unsigned long) - i%(sizeof(unsigned long)*8) - 1);
        
        //如果取出的是0,说明编码为1,则往右子树寻找
        if(flag == 0)
            p = arr[p].ltree;
        //否则寻找左子树
        else 
            p = arr[p].rtree;
        //直到找到叶子节点,就在fp4中打印叶子节点中的ascii编码值,然后把p重置为根节点
        if(arr[p].ltree == 0 && arr[p].rtree == 0)
        {
            q = arr[p].value;
            putc(q, fp4);
            p = root;
        }
    }
 
    fclose(fp4);
    fclose(fp2);
    fclose(fp3);
}

头文件

#ifndef _ZDOWN
#define _ZDOWN
 
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <math.h>
#include <string.h>
 
#define SIZE 256
 
struct tree{
    int parents;
    int ltree;
    int rtree;
    int weight;
    int value;
};
 
struct codes{
    unsigned long code;
    int bits;
};
 
void _ud(char *argv[]);
void _zd(char *argv[]);
void unzdown(FILE *fp2, FILE *fp3, FILE *fp4);
unsigned long zdown(FILE *fp1, FILE *fp2, struct codes *Code);
void creat_codes(struct tree *Hefuman, struct codes *Code, int n);
int creat_tree(struct tree *Hefuman, int *count);
void Count(FILE *fp, int *count);
 
#endif

zdown.c

#include "zdown.h"
 
void unzdown(FILE *fp2, FILE *fp3, FILE *fp4)
{
    int i, root, p, q;
    unsigned long count, buf, flag;
    struct tree arr[SIZE];
    
    fread(&count, sizeof(unsigned long), 1, fp3);
    fread(&arr, sizeof(struct tree), SIZE, fp3);
 
 
 
    for (i = 0; i < SIZE; i++)
    {
        if(arr[i].parents == 0)
            break;
    }
 
    root = i;
    p = root;
    for(i = 0; i < count; i++)
    {
        if(i % (sizeof(unsigned long) * 8) == 0)
            fread(&buf, sizeof(unsigned long), 1, fp2);
        
        flag = buf & (unsigned long)pow(2, 8*sizeof (unsigned long) - i%(sizeof(unsigned long)*8) - 1);
    
        if(flag == 0)
            p = arr[p].ltree;
        else 
            p = arr[p].rtree;
        if(arr[p].ltree == 0 && arr[p].rtree == 0)
        {
            q = arr[p].value;
            putc(q, fp4);
            p = root;
        }
    }
 
    fclose(fp4);
    fclose(fp2);
    fclose(fp3);
}
 
 
 
unsigned long zdown(FILE *fp1, FILE *fp2, struct codes *Code)
{
    int ch, i, tab;
    unsigned long buf = 0, count = 0;
    tab = 0;
 
    while((ch = getc(fp1)) != EOF)
    {
        for(i = 0; i < Code[ch].bits; i++)
        {
            unsigned long n = Code[ch].code & (unsigned long)pow(2, i);
            buf <<= 1;
            if(n != 0)
                buf++;
            tab++;
            if(tab == sizeof(buf) * 8)
            {
                fwrite(&buf, sizeof(unsigned long), 1, fp2);
                tab = buf = 0;
                count++;
            }
        }
    }
 
    if(tab != 0)
    {
        buf <<= (sizeof(unsigned long)*8 - tab);
        fwrite(&buf, sizeof(unsigned long), 1, fp2);
    }
    count = count * sizeof(unsigned long) * 8 + tab;
    return count;
}
 
void creat_codes(struct tree *Hefuman, struct codes *Code, int n)
{
    int i, p, j;
    struct codes ch;
 
    for(i = 0; i < n; i++)
    {
        p = Hefuman[i].parents;
        ch = (struct codes) {0, 0};
        j = i;
        while (p != 0)
        {
            ch.bits++;
            if(Hefuman[p].ltree == j)
                ch.code <<= 1;
            else if(Hefuman[p].rtree == j)
            {
                ch.code <<= 1;
                ch.code++;
            }
            j = p;
            p = Hefuman[p].parents;
        }
 
        Code[Hefuman[i].value] = ch;
        
    }
    
}
 
int creat_tree(struct tree *Hefuman, int *count)
{
    int i, j, n;
    int m1, m2, x1, x2;
 
    for(i = 0; i < SIZE; i++)
    {
        Hefuman[i].ltree = 0;
        Hefuman[i].parents = 0;
        Hefuman[i].rtree = 0;
        Hefuman[i].weight = 0;
        Hefuman[i].value = 129;
    }
 
    for(i = 0, j = 0; i < SIZE; i++)
    {
        if(count[i] == 0)
            continue;
        Hefuman[j].weight = count[i];
        Hefuman[j].value = i;
        j++;
    }
    
    n = j;
    
    for(i = 0; i < n - 1; i++)
    {
        m1 = m2 = INT_MAX;
        x1 = x2 = 0;
 
        for(j = 0; j < n + i; j++)
        {
            if(Hefuman[j].weight < m1 && Hefuman[j].parents == 0)
            {
                m2 = m1;
                x2 = x1;
                x1 = j;
                m1 = Hefuman[j].weight;
            }
            else if(Hefuman[j].weight < m2 && Hefuman[j].parents == 0)
            {
                x2 = j;
                m2 = Hefuman[j].weight;
            }
        }
 
        Hefuman[j].weight = m1 + m2;
        Hefuman[j].rtree = x1;
        Hefuman[j].ltree = x2;
 
        Hefuman[x1].parents = j;
        Hefuman[x2].parents = j;
    }
 
    
//    printf("weight \tvalue \tltree \trtree \tparents\n");
//    for(i = 0; i < 2 * n - 1; i++)
//    {
//    
//        printf("%d \t%d \t%d \t%d \t%d\n", Hefuman[i].weight, Hefuman[i].value, Hefuman[i].ltree, Hefuman[i].rtree, Hefuman[i].parents);
//    }
    
 
 
    return n;
}
 
void Count(FILE *fp, int *count)
{
    int ch;
    while ((ch = getc(fp)) != EOF)
        count[ch]++;
}

main.c

#include "zdown.h"
 
int main(int argc, char *argv[])
{
 
    if(argc == 1)
    {
        printf("提示:\n");
        printf("-z “zdown -z 待压缩文件名 解码字典文件名 编码后文件名” 格式压缩文件\n");
        printf("-r “zdown -r 编码后文件名 解码字典文件名 还原后文件名” 格式解压文件\n");
        return 0;
    }
 
    if(argc != 5)
    {
        fprintf(stderr, "Wrong parameter input!\n");
        exit(EXIT_FAILURE);
    }
    
    if(strcmp(argv[1], "-z") == 0)
        _zd(argv);
    else if(strcmp(argv[1], "-r") == 0)
        _ud(argv);
    else {
        fprintf(stderr, "Wrong option input!\n");
        exit(EXIT_FAILURE);
    }
    
    return 0;
}
 
 
void _zd(char *argv[])
{
    int i, n;
    int count[SIZE] = {0};
    unsigned long m;
    struct tree Hefuman[SIZE];
    struct codes Code[SIZE];
 
    FILE *fp1, *fp2, *fp3;
    fp1 = fopen(argv[2], "r");
    if(fp1 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[2]);
        exit (EXIT_FAILURE);
    }
    Count(fp1, count);
    rewind(fp1);
 
    n = creat_tree(Hefuman, count);
    
    creat_codes(Hefuman, Code, n);
    
    fp2 = fopen(argv[4], "wb");
    if(fp2 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[4]);
        exit (EXIT_FAILURE);
    }
    
    m = zdown(fp1, fp2, Code);
 
    fclose(fp1);
    fclose(fp2);
    
    fp3 = fopen(argv[3], "wb");
    if(fp3 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[3]);
        exit (EXIT_FAILURE);
    }
 
    fwrite(&m, sizeof(unsigned long), 1, fp3);
    fwrite(Hefuman, sizeof(struct tree), SIZE, fp3);
    fclose(fp3);
 
 
 
 
 
}
 
void _ud(char *argv[])
{
 
    FILE *fp2, *fp3, *fp4;
 
    fp2 = fopen(argv[2], "rb");
    fp3 = fopen(argv[3], "rb");
    fp4 = fopen(argv[4], "w");
    if(fp2 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[2]);
        exit (EXIT_FAILURE);
    }
 
    if(fp3 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[3]);
        exit (EXIT_FAILURE);
    }
    if(fp4 == NULL)
    {
        fprintf(stderr, "Can't open the \"%s\" file\n", argv[4]);
        exit (EXIT_FAILURE);
    }
    unzdown(fp2, fp3, fp4);
}

假设一个字符串"S": “ABRACADABRA”
A:5次
B:2次
R:2次
C:1次
D:1次

        11
     0  /   \
      /     \1
   A (5)     6
           0 /  \1
           B (2)  4
                0 /  \1
                R (2) 2
                    0 /  \1
                    C (1) D (1)

Huffman树生成规则:

左孩子为0,右孩子为1
小的放+右,大的放+左
1.找出两个最小频度生成 w(2)=c(1)+d(1) cd频度 (c<d) 小的放右,大的放左
2.w(4)=R(2)+w(2)
3.w(6)=B(2)+w(4)
4.w(11)=A(5)+w(6)

带权路径长度最短,就是huffman树。依次为两节点的连接线编码,左孩子为0,右孩子为1
路径长度:树中一个节点到另一个节点之间分支构成这两个节点之间的路径,路径上的分支数目为其路径长度
树的路径长度:树根到每一个节点的路径长度之和 为 “l”
节点的带权路径长度:节点到树根之间的路径长度与节点上权的乘积
树的带权路径长度:所有节点的带权路径长度之和,记作 WPL = ∑wk * lk (k=[1 n])
wpl=51+22+23+14+1*4=23
Huffman 编码
A:0
B:10
R:110
C:1110
D:1111

void huffman(int w[],int n) //w 权重数组,n是数组的长度

{

// 创建数组

int* t = new int [2 * n - 1];//存储树节点的值

int* parent = new int [2 * n - 1];//存储树的节点的父节点的下标,默认值-1

int* flag = new int [2 * n - 1];//是否已被使用,0未被使用,1被使用

int* lor = new int [2 * n - 1];//判断是左子树还是右子树,默认-1,0为左,1为右,-1为根结点

int* HuffmanCode = new int [9];

int m = 2 * n - 1;

int x = m ;

//初始化四个数组

int i = 0;

int j = 0;

for(i = 0; i < m; i++)

{

t[i] = 0;

parent[i] = -1;

flag[i] = 0;

lor[i] = -1;

}

for( i = 0; i < n; i++)

{

t[i] = w[i];

}

int p,q;//最小值的下标

for( i = 0; i < n - 1; i++) //执行n - 1 次

{

p = -1, q = -1;

//找到最小的

for( j = 0; j < n + i; j++)

{

if(flag[j] == 1) continue;//跳过已经使用过的节点

if(p == -1) p = j;

else if(t[j] < t[p])

{

p = j;

}

}

flag[p] = 1;

lor[p] = 0;

//找第二小的

for(j = 0; j < n + i; j++)

{

if(flag[j] == 1)continue;//跳过已经使用的节点

if(q == -1) q = j;

else if (t[j] < t[q])

{

q = j;

}

}

flag[q] = 1;

lor[q] = 1;

//合并

t[n+i] = t[p] +t[q];

parent[p] = n + i;

parent[q] = n + i;

//printf("lor2 == %d\n",lor[p]);

//printf("x1 == %d\n",sub[p]);

}

/*

int s = 0;

for( i = 0; i < 4; i++)

{

int k = m - parent[i] - 1;//

//K1=7-6-1=0

//k2 = 7-4-1=2;

//k3 =7-5-1=1;

//k4 = 7-4-1 = 2

int r = 0;

for( int r = 0; r < k ; r++)

{

HuffmanCode[s] = lor[parent[i]];

// printf("hu =%d,s = %d\n",HuffmanCode[s],s);

s++;

}

HuffmanCode[s] = lor[i];

//printf("hu2 =%d,s = %d\n",HuffmanCode[s],s);

s++;

}

*/

int k,s = 0;

for(i = 0; i < n; i++)

{

k = i;

HuffmanCode[s] = lor[i];

s++;

while(lor[parent[k]]!= -1)

{

HuffmanCode[s] = lor[parent[k]];

k = parent[k];

s++;

}

}

printf("Hm = ");

for (i= 0; i < 9; i++)

{

printf("%4d",HuffmanCode[i]);

}

printf("\n");

int u = 8;

for(i = n - 1; i >= 0; i--)

{

printf("%d的编码是:",t[i]);

int t = m - parent[i];

for(j = 0; j < t; j++)

{

printf("%d",HuffmanCode[u]);

u--;

}

printf("\n");

}

printf("w = ");

for (i= 0; i< m; i++)

{

printf("%4d ",t[i]);

}

printf("\n");

printf("lor = ");

for (i= 0; i< m; i++)

{

printf("%4d ",lor[i]);

}

printf("\n");

printf("parent = ");

for(i =0; i

{

printf("%4d ",parent[i]);

}

printf("\n");

printf("flag = ");

for(i = 0; i

{

printf("%4d ",flag[i]);

}

printf("\n");

}

int main()

{

int w[] = {8,4,5,2};

huffman(w,4);

return 0;

}

.h头文件

#pragma once
#include <fstream>
#include<iostream>
#include<vector>
#include<queue>
#include<stack>
#include<string>

using namespace std;



struct Node {
	int weight; 
	int num;  
	Node* p_left; 
	Node* p_right;  
	Node* p_parent; 
	Node(Node* p_left, Node* p_right, Node* p_parent) : p_left(p_left), p_right(p_right), p_parent(p_parent) {};
};


class BinaryTree
{
public:

	enum Brother { LeftChild, RightChild };
	BinaryTree(int num = 0, int weight = 0);
	~BinaryTree();
	bool swap(Node* p_nodeA, Node* p_nodeB);
	bool addNode(Node* p_parent, Node* p_child, Brother brotherState);
	Node* findNode(string in);
	void deleteNode(Node *p_node);
	Node* getRoot() { return p_root; }

	Brother getBrotherState(Node *p_node);
	bool isAncestor(Node* p_nodeChild, Node* p_nodeAncestor);
private:
	Node *p_root;

};



class HuffmanTree
{
public:
	int sum;
	HuffmanTree();


	void encode(string input);
	void weightAdd(Node* p_node);
	void decode(string input);
	
	BinaryTree tree;


	struct charMap {
		char key;
		std::string value;
		Node* p;
	};
	vector<charMap> buffers;

	string getHuffmanCode(Node *p);
	Node * findLarge(Node *);

	
}; 


cpp文件,main函数里包含初始编码表

#include "huffmanTree.h"
#include <map>
map<char, string> initCode;


BinaryTree::BinaryTree(int num, int weight)
{
	p_root = new Node(nullptr, nullptr, nullptr);
	p_root->num = num; //节点的序号
	p_root->weight = weight;  //节点的权重值
}


BinaryTree::~BinaryTree()
{
	deleteNode(p_root);
}


bool BinaryTree::swap(Node * p_nodeA, Node * p_nodeB)
{
	if (p_nodeA == nullptr || p_nodeB == nullptr || p_nodeA == p_nodeB)
		return false;
	Node *pTemp;
	if (getBrotherState(p_nodeA) == LeftChild) { //如果A节点是左孩子
		if (getBrotherState(p_nodeB) == LeftChild) { // 如果B节点是左孩子
			pTemp = p_nodeA->p_parent->p_left;
			p_nodeA->p_parent->p_left = p_nodeB->p_parent->p_left;
			p_nodeB->p_parent->p_left = pTemp;
		}
		else {
			pTemp = p_nodeA->p_parent->p_left;
			p_nodeA->p_parent->p_left = p_nodeB->p_parent->p_right;
			p_nodeB->p_parent->p_right = pTemp;
		}
	}
	else {
		if (getBrotherState(p_nodeB) == LeftChild) {
			pTemp = p_nodeA->p_parent->p_right;
			p_nodeA->p_parent->p_right = p_nodeB->p_parent->p_left;
			p_nodeB->p_parent->p_left = pTemp;
		}
		else {
			pTemp = p_nodeA->p_parent->p_right;
			p_nodeA->p_parent->p_right = p_nodeB->p_parent->p_right;
			p_nodeB->p_parent->p_right = pTemp;
		}
	}

	pTemp = p_nodeA->p_parent;
	p_nodeA->p_parent = p_nodeB->p_parent;
	p_nodeB->p_parent = pTemp;
	return true;

}


bool BinaryTree::addNode(Node * p_parent, Node * p_child, Brother brotherState)
{
	if (p_parent == nullptr || p_child == nullptr)
		return false;
	if (brotherState == LeftChild) { 
		if (p_parent->p_left != nullptr) {
			std::cout << "error:left child exist!" << std::endl;
			return false;//如果父节点有左孩子,则不能添加到左孩子位置
		}
		p_parent->p_left = p_child;//否则可以添加
	}
	else if (brotherState == RightChild) { 
		if (p_parent->p_right != nullptr) {
			std::cout << "error:right child exist!" << std::endl;
			return false;//如果父节点有右孩子,则不能添加到右孩子位置
		}
		p_parent->p_right = p_child;//否则可以添加
	}
	else {
		std::cout << "error:brotherState is wrong!" << std::endl;//读取位置信息错误
		return false;
	}
	p_child->p_parent = p_parent;
	return true;
}


bool BinaryTree::isAncestor(Node * p_nodeChild, Node * p_nodeAncestor)
{
	while (p_nodeChild != p_root) {
		if (p_nodeChild == p_nodeAncestor) {
			return true;
		}
		else {
			p_nodeChild = p_nodeChild->p_parent;
		}
	}
	return false;
}


void BinaryTree::deleteNode(Node *p_node)
{
	if (p_node->p_left != nullptr) {
		deleteNode(p_node->p_left);
	}
	if (p_node->p_right != nullptr) {
		deleteNode(p_node->p_right);
	}
	delete p_node;
}


BinaryTree::Brother BinaryTree::getBrotherState(Node *p_node)
{
	if (p_node->p_parent->p_left == p_node) {
		return LeftChild;
	}
	else {
		return RightChild;
	}
}


HuffmanTree::HuffmanTree() :tree(0, 0)
{

	sum = 1;
}


string HuffmanTree::getHuffmanCode(Node *p_n)
{
	std::string huffmanCode = "";
	std::stack<Node *> stack;
	std::deque<char> code;
	if (p_n == tree.getRoot())
		return "0";

	while (p_n != tree.getRoot()) {
		if (tree.getBrotherState(p_n) == tree.LeftChild) {
			code.push_back('0');
		}
		else {
			code.push_back('1');
		}
		p_n = p_n->p_parent;
	}
	while (!code.empty()) {
		huffmanCode += code.back();
		code.pop_back();
	}
	



	return huffmanCode;
}


Node * HuffmanTree::findLarge(Node *p_node)
{
	std::stack<Node *> stack;
	Node *p = tree.getRoot();
	Node *large = p;
	while (p || !stack.empty()) {
		if (p != nullptr) {
			stack.push(p);
			if (p->weight == p_node->weight) {
				//如果large不在同权重下,则置large为p
				if (large->weight != p->weight) {
					large = p;
				}
				//同权重下的large比p小,则置large为p
				else if (large->num > p->num) {
					large = p;
				}
			}
			p = p->p_left;
		}
		else {
			p = stack.top();
			stack.pop();
			p = p->p_right;
		}
	}

	if (large == tree.getRoot()) {
		return p_node;
	}
	return large;
}


void HuffmanTree::encode( string input)
{


	char cbuffer;
	Node *nyt = tree.getRoot();
	bool exist = false;
	for (int i = 0; i < input.length(); i++) { 

		cbuffer = input[i];		
		exist = false;
		string code;


		auto existNode = buffers.begin();	
		for (existNode; existNode != buffers.end(); existNode++) {
			if (existNode->key == cbuffer) {
				code = existNode->value;
				exist = true;
				cout << cbuffer << " 在树中存在,编码为: " << existNode->value << endl; 
				break;
			}
		}



		if (exist) 
		{ 

			Node *root = existNode->p;
			weightAdd(root);

		}
		else 
		{
			
			Node *c = new Node(nullptr, nullptr, nyt);
			c->num = sum++;
			c->weight = 1;

			Node *NYT = new Node(nullptr, nullptr, nyt);
			NYT->num = sum++;
			NYT->weight = 0;

			cout << "\n NYT:" << getHuffmanCode(nyt) << endl;

			tree.addNode(nyt, NYT, BinaryTree::LeftChild);
			tree.addNode(nyt, c, BinaryTree::RightChild);
			nyt->weight = 1;

			
			cout << cbuffer << "首次出现,编码为:"<< initCode.at(cbuffer) << endl;
			
			charMap* new_cm = new charMap();
			new_cm->key = cbuffer;
			new_cm->p = nyt->p_right;
			new_cm->value = getHuffmanCode(nyt->p_right);
			buffers.push_back(*new_cm);

			Node *root = nyt->p_parent;
			weightAdd(root);
			
			nyt = nyt->p_left;
			
		}

	}
	
}



void HuffmanTree::weightAdd(Node * p_node)
{
	while (p_node != nullptr) {
		Node* large = findLarge(p_node);
		if (large != p_node && !tree.isAncestor(p_node, large)) { 			
			tree.swap(large, p_node);
			int temp;
			temp = large->num;
			large->num = p_node->num;
			p_node->num = temp;
			for (auto iterator = buffers.begin(); iterator != buffers.end(); iterator++) {
				iterator->value = getHuffmanCode(iterator->p);
			}
		}
		p_node->weight++;		
		p_node = p_node->p_parent;
	}
}



void HuffmanTree::decode(string input)
{
	Node *nyt = tree.getRoot();
	int p = 0;
	int l = 1;
	string temp;
	bool exit = false;
	for (;p+l<= input.length();)
	{
		exit = false;
		temp = input.substr(p, l);
		cout << "\n循环: " << temp ;
		
		//如果是NYT,说明有新的
		if (temp == getHuffmanCode(nyt)) 
		{
			p+=l;
			l = 5;
			temp = input.substr(p, l);
			
			//在字典中寻找对应值
			for (auto iter = initCode.begin(); iter != initCode.end(); ++iter) {
				string cur = iter->second;
				if (cur == temp)
				{
					//找到了就加新的

					cout << "      新码的:" << iter->first << endl;

					Node *c = new Node(nullptr, nullptr, nyt);
					c->num = sum++;
					c->weight = 1;

					Node *NYT = new Node(nullptr, nullptr, nyt);
					NYT->num = sum++;
					NYT->weight = 0;

					

					tree.addNode(nyt, NYT, BinaryTree::LeftChild);
					tree.addNode(nyt, c, BinaryTree::RightChild);
					nyt->weight = 1;


					charMap* new_cm = new charMap();
					new_cm->key = iter->first;
					new_cm->p = nyt->p_right;
					new_cm->value = getHuffmanCode(nyt->p_right);
					buffers.push_back(*new_cm);

					//依次增加权重
					Node *root = nyt->p_parent;
					weightAdd(root);

					//设置新的nyt节点为原nyt节点的左孩子
					nyt = nyt->p_left;

				}
				
			}
			p += l;
			l = 1;
		
			exit = true;
		}
		else//如果不是NYT,就在树里面找
		{
			auto existNode = buffers.begin();
			for (existNode; existNode != buffers.end(); existNode++) {
				if (existNode->value == temp) 
				{
					//找到
					cout  << "     在树中存在,为: " << existNode->key << endl;
					Node *root = existNode->p;
					weightAdd(root);

					p += l;
					l = 1;
					exit = true;
					break;
				}
			}
		}

		//如果即不再树中也不在字典中,l++
		if(!exit)
			l++;




	}

}


//主函数程序
int main()
{


	HuffmanTree huff;
	
	//这个字典是初始编码表
	initCode['A'] = "00001";
	initCode['B'] = "00010";
	initCode['C'] = "00011";
	initCode['D'] = "00100";




	string input = "ABBCADAD";
	huff.encode(input);//进行编码的函数



	//以下是解码函数
	HuffmanTree dhuff;
	dhuff.decode("0000010000100100000110110000100111001");
	system("PAUSE");
	return 0;
}

  • 27
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

weixin_44245323

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值