基于哈夫曼编码实现压缩与解压缩

基于哈夫曼编码实现压缩与解压缩

输入文件为 Inputfile.txt 压缩后的文件为 HuffmanPress.dat
解压后的文件为 DecodeFile.txt

以下代码基于 VsCode 运行
可以实现包含英文小写,大写,空格,换行符的文本的压缩


minHeap.h

#ifndef MIN_HEAP_H
#define MIN_HEAP_H
//华南师范大学 关竣佑 
#include<iostream>

using namespace std;
// utf8 
template<class T>
class minHeap
{
    private:
        T* heap;  //结点从 0 开始的堆
        int size;
        int Capacity;

        void expand()  //空间不够,扩展
        {
            T* newArray = new T[Capacity*2];
            for(int i=0; i < Capacity; i++)
            {
                newArray[i] = heap[i];
            }
            T* toDel = heap;
            heap = newArray;
            Capacity =Capacity*2;
            delete toDel;
        }

        bool cmp(T a,T b)   
        {  //是不是左边那个小
            // if(a < b)
            //     return true;
            // return false;
            return a->weight < b->weight;
        }

        void AdjustDown(int currentIndex) // 向下调整
        {
            T value = heap[currentIndex];

            int nextIndex = currentIndex;
            while(1)
            {
                if(currentIndex * 2 +1 >= size) // 2i+1=left  2i+2 = right
                    break;
                nextIndex = 2*currentIndex + 1;
                if(nextIndex + 1 < size && cmp(heap[nextIndex+1],heap[nextIndex])) // 选最小的儿子
                {
                    nextIndex = nextIndex + 1;
                }
                
                if(cmp(value,heap[nextIndex]))   // value 比下面的还要小
                    break; // 不用处理了
                //更小的上去
                heap[currentIndex] = heap[nextIndex];
                currentIndex = nextIndex;


            }

            heap[currentIndex] = value; //找到正确的位置放下去

        }

        void AdjustUp(int currentIndex)
        {
            T value = heap[currentIndex];

            int nextIndex = currentIndex;
            while(1)
            {
                if(currentIndex <= 0)
                    break; // 到树根了

                nextIndex = (currentIndex-1) / 2;  //它的双亲
               // cout<<heap[currentIndex]<<" "<<heap[nextIndex]<<endl;
                if(cmp(heap[nextIndex],value))
                    break;

                heap[currentIndex] = heap[nextIndex];
                currentIndex = nextIndex; 

                 //this->Print();
            }
            heap[currentIndex] = value;
        }


    public:
        minHeap()
        {
            heap = new T[10];
            size = 0;
            Capacity = 10;
        }

        minHeap(T arr[],int n)  //把一个数组初始化为堆
        {
            heap = arr;
            size = n;
            Capacity = 2*size;

            int i = (size - 3)/2; // 即 (最后一个元素下标 - 2) / 2 开始
            while(i>=0)
            {
                AdjustDown(i);
                i--;
            }
        }
    
        int Size()
        {
            return size;
        }

        T Top()  //返回堆顶元素
        {
            if(size == 0)
                cout<<"没有元素"<<endl;
            return heap[0];
        }

        void Push(T element) //插入
        {
            if(size == Capacity)
                expand();
            size++;
            heap[size-1] = element;
            AdjustUp(size-1);
        }

        void Pop() //删除堆顶
        {
            if(size == 0)
                cout<<"操作非法"<<endl;
            heap[0] = heap[size-1];
            size--;
            AdjustDown(0);
        }

        void Print()
        {
            for(int i=0;i<size;i++)
            {
                cout<<heap[i]<<" ";
            }
            cout<<endl;
        }

        bool isEmpty()
        {
            return size==0;
        }

};

#endif

HuffmanTreeNode.h

#ifndef HUFFMAN_TREE_NODE_H
#define HUFFMAN_TREE_NODE_H

#include<iostream>
using namespace std;
// 哈夫曼树的结点
template<class T>
class HuffmanTreeNode
{
    public:
        int weight;
        T data; //代表符号
        HuffmanTreeNode<T>* left;
        HuffmanTreeNode<T>* right;
        HuffmanTreeNode<T>* parent;

        HuffmanTreeNode(T data,int weight)
        {
            this->weight = weight;
            this->data = data;
            right = NULL;
            left = NULL;
            parent = NULL;
        }

        bool operator<(HuffmanTreeNode<T>* node1) const
        {
            cout<<"----"<<endl;
            if(this->weight < node1->weight)
                return true;
            return false;
        }
        
        bool operator>(HuffmanTreeNode<T>* node1) const
        {
            cout<<"----"<<endl;
            if(this->weight > node1->weight)
                return true;
            return false;
        }

        bool operator==(HuffmanTreeNode<T>* node1) const
        {
            cout<<"----"<<endl;
            if(this->weight == node1->weight)
                return true;
            return false;
        }

};

#endif

HuffmanTree.h

#ifndef HUFFMAN_TREE_H
#define HUFFMAN_TREE_H

#include"HuffmanTreeNode.h"
#include"minHeap.h"
#include<map>
#include<unordered_map>
#include<utility>
using namespace std;




template<class T>
class HuffmanTree
{
    public:

        HuffmanTreeNode<T>* root;
        int size;
        map<T,HuffmanTreeNode<T>*> LeafMap; //字符对应叶子的哈希表

        HuffmanTree(T value[], int w[],int n) //建树
        {
            //LeafMap = new  map<T,HuffmanTreeNode<T>*>();
            minHeap<HuffmanTreeNode<T>*> heap;
            for(int i=0;i<n;i++)
            {
               //cout<<w[i]<<endl;
                HuffmanTreeNode<T>* node = new HuffmanTreeNode<T>(value[i],w[i]);
                //HuffmanTreeNode<T> node(value[i],w[i]);
                LeafMap.insert(pair<T,HuffmanTreeNode<T>*>(value[i], node));
                heap.Push(node);
            }
            //cout<<"000"<<endl;
            size = n;
            //开始建树
            for(int i=0;i<size-1;i++)
            {
                //cout<<i<<endl;
                HuffmanTreeNode<T>* leftNode = heap.Top();
                heap.Pop();
                HuffmanTreeNode<T>* rightNode = heap.Top();
                heap.Pop();
                //cout<<leftNode->weight<<" "<<rightNode->weight<<endl;
                HuffmanTreeNode<T>* newNode = new HuffmanTreeNode<T>(leftNode->data,leftNode->weight+rightNode->weight);
                newNode->left = leftNode;
                newNode->right = rightNode;
                leftNode->parent = newNode;
                rightNode->parent = newNode;
                heap.Push(newNode);

                // HuffmanTreeNode<T> leftNode = heap.Top();
                // heap.Pop();
                // HuffmanTreeNode<T> rightNode = heap.Top();
                // heap.Pop();
                // cout<<leftNode.weight<<" "<<rightNode.weight<<endl;
                // HuffmanTreeNode<T> newNode(leftNode.data,leftNode.weight+rightNode.weight);
                // newNode.left = &leftNode;
                // newNode.right = &rightNode;
                // leftNode.parent = &newNode;
                // rightNode.parent = &newNode;
                // heap.Push(newNode);
            }
            root = heap.Top();

        }

        map<T,HuffmanTreeNode<T>*> GetLeaf()
        {
            return LeafMap;
        }

        HuffmanTreeNode<T>* GetRoot()
        {
            return root;
        }

        
};

#endif

HuffmanEncode.h

#ifndef HUFFMAN_TREE_ENCODE_H
#define HUFFMAN_TREE_ENCODE_H
#include"HuffmanTree.h"
#include<bitset>
#include<fstream>
#include<ostream>
#include<string>
#include<algorithm>
#include<stdio.h>
#include <sys/stat.h>
#include<cstring>
using namespace std;

template<class T>
void InOrder(HuffmanTreeNode<T>* node)
{
    if(node->left)
        InOrder(node->left);
    cout<<node->weight<<" ";
    if(node->right)
        InOrder(node->right);
}

template<class T>
void PreOrder(HuffmanTreeNode<T>* node)
{
    cout<<node->weight<<" ";
    if(node->left)
        PreOrder(node->left);
    if(node->right)
        PreOrder(node->right); 
}

//获得文件大小的函数
int getFileSize1(string fileName) {

	struct stat statbuf;
    char file[10];
    strcpy(file,fileName.c_str());

	stat(file, &statbuf);
	
	int filesize = statbuf.st_size;

	return filesize;
}


class HuffmanTreeEncode
{
    public:
        int size;
        HuffmanTree<char>* tree;
        char* value;
        int* w;
        int index;
        int BeforePressSize;
        string code;
        map<char,HuffmanTreeNode<char>*> LeafMap;

    HuffmanTreeEncode()
    {

        size = 0;
        tree = NULL;
        char c = 0;
        value = new char[128];

        for(int i=0;i<128;i++)
            value[i] = c+i;
        w = new int[128];
        for(int i=0;i<128;i++)
            w[i] = 0;
        code = "";
    }

    void encode(string path)  //规则 往左子树为1 右子树为0
    {
        //string text = "";
        ifstream ifs;
        ifs.open(path);
        // string tmp;
        // while(ifs>>tmp)
        // {
        //     text+=tmp;
        // }
         BeforePressSize = getFileSize1(path);

        istreambuf_iterator<char> begin(ifs);
        istreambuf_iterator<char> end;
        string text(begin, end);
        ifs.close();
    


        for(int i=0;i<text.size();i++)
        {
            w[text[i]-char(0)]++;
        }

        // for(int i=0;i<26;i++)
        //     cout<<w[i]<<" ";
        
        tree = new HuffmanTree<char>(value,w,128);
        // InOrder(tree->root);
        // cout<<endl;
        // PreOrder(tree->root);

        LeafMap = tree->GetLeaf();
        
        for(int i=0;i<text.size();i++)
        {
            //cout<<i<<endl;
            code+=FindToRoot(text[i]);
        }

        int n = code.size();
        
        // 编码输出到文�?
        //cout<<code<<endl;
        //reverse(code.begin(),code.end());
        bitset<1000000> bits(code);  //开大点,能最多放�?100�?
        //cout<<bits<<endl;
        ofstream ofs;
        ofs.open("HuffmanPress.dat");
        cout<<sizeof(bits)<<endl;
        // for(int i=0;i<10;i++)
        //     cout<<bits[i]<<endl;
        int wsize = code.size()/8+128;  //保证输出的字节数够用但是尽量少,因为一开始bitset数组里面的多余位�?0
        //int wsize = sizeof(bits);
 
        //输出权重信息等到文件以便解码时建树
        ofs<<code.size()<<' ';
        for(int i=0;i<127;i++)
            ofs<<w[i]<<' ';
        ofs<<w[127];
        //cout<<bits<<endl;
 
        ofs.write((char*)&bits,wsize);
        ofs.close();
        int AfterPressSize = getFileSize1("HuffmanPress.dat");

        float ratio = 0;//压缩率
        ratio = ( (float) AfterPressSize) / ((float)BeforePressSize) ;
        //cout << "压缩前大小为:" << BeforePressSize << "字节    压缩后大小为:" << AfterPressSize << "字节" << endl;
        printf("压缩前大小为:%d 字节  压缩后大小为:%d 字节 ", BeforePressSize,AfterPressSize);
        printf("---------- 压缩成功!---------\n");
        printf("---------压缩率为: %.2f %%--------",ratio*100);

        cout<<endl;
        
    }
    



    string FindToRoot(char key)
    {
        string ans = "";
         HuffmanTreeNode<char>* node = LeafMap[key];
        while(node->parent)
        {
            HuffmanTreeNode<char>* parent = node->parent;
            if(parent->left == node)
                ans+='1';
            else
                ans+='0';
            node = node->parent;
        }
        reverse(ans.begin(),ans.end());
        return ans;
    }




};

#endif

HuffmanDecode.h

#ifndef HUFFMAN_TREE_DECODE_H
#define HUFFMAN_TREE_DECODE_H
#include"HuffmanTree.h"
#include<bitset>
#include<fstream>
#include<ostream>
#include<string>
#include<algorithm>
#include<stdio.h>
#include <sys/stat.h>
#include<cstring>
using namespace std;

// template<class T>
// void InOrder(HuffmanTreeNode<T>* node)
// {
//     if(node->left)
//         InOrder(node->left);
//     cout<<node->weight<<" ";
//     if(node->right)
//         InOrder(node->right);
// }

// template<class T>
// void PreOrder(HuffmanTreeNode<T>* node)
// {
//     cout<<node->weight<<" ";
//     if(node->left)
//         PreOrder(node->left);
//     if(node->right)
//         PreOrder(node->right); 
// }



class HuffmanTreeDecode
{
    public:
        int size;  //应该读取的字节数
        HuffmanTree<char>* tree;
        char* value;
        int* w;
        int index;
        string code;

    HuffmanTreeDecode()
    {
        size = 0;
        tree = NULL;
        value = new char[128];
        w = new int[128];

        char c = 0;
        for(int i=0;i<128;i++)
            value[i] = c+i;

    }



    void decode(string path)
    {
        int codesize = 0;
        ifstream ifs;
        ifs.open("HuffmanPress.dat");
        bitset<1000000> ans;

        ifs>>codesize;
        for(int i=0;i<128;i++)
            ifs>>w[i];

        int rsize = codesize/8 + 1;
        ifs.read((char*)&ans,sizeof(ans));
        ifs.close();
        //cout<<ans<<endl;

        int index = codesize-1;

        //重新建树
        tree = new HuffmanTree<char>(value,w,128);

        string s = "";
        while(index>=0)
        {
            char c = FindLeafChar(tree->GetRoot(),index,ans);
            s+=c;
        }

        //cout<<s<<endl;

        cout<<"-------解压成功!文件输出到 DecodeFile.txt--------"<<endl;
        ofstream ofs;
        ofs.open("DecodeFile.txt");
        ofs<<s;
        ofs.close();
    }


    char FindLeafChar(HuffmanTreeNode<char>* root,int& index,bitset<1000000> code)
    {
        //找到叶子就可以知道原文
        while(root)
        {
            if(root->left == NULL && root->right == NULL)
            {
                return root->data;
            }
            if(code[index] == 1)
            {
                root = root->left;
            }
            else{
                root = root->right;
            }
            index--;
        }
        return ' ';
    }



};

#endif

HuffmanEncode.cpp

#include"HuffmanTree.h"
#include"HuffmanTreeEncode.h"
#include"HuffmanTreeDecode.h"
using namespace std;
// 华南师范大学 关竣佑
//哈夫曼编码压缩
int main()
{

    
    HuffmanTreeEncode Encode;
    
    Encode.encode("Inputfile.txt");

    //Encode.decode();


    return 0;
}

HuffmanDecode.cpp

#include"HuffmanTree.h"
#include"HuffmanTreeEncode.h"
#include"HuffmanTreeDecode.h"
using namespace std;
// 华南师范大学 关竣佑
//哈夫曼编码解压器
int main()
{

    HuffmanTreeDecode Decode;
    
    Decode.decode("HuffmanPree.dat");

    //Encode.decode();


    return 0;
}

目前的代码仍可能偶然出现解码乱码,字符不匹配的情况,正在改进中。

  • 2
    点赞
  • 25
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

June_gjy

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值