文件压缩c++

主要实现思路: 先对字符进行哈夫曼编码,再利用最小堆的特性,构造哈夫曼树。

  • 统计文件字符出现的次数,本质是利用字符出现的次数进行构造哈夫曼树; 然后通过遍历哈夫曼树获取哈夫曼编码;
  • 压缩文件中有两部分信息,先是配置信息的主要内容:字符,字符出现次数,然后是huffman code,通过info标记区分两部分信息;
    当解压缩时可以利用压缩文件配置信息中的内容构造出一颗新的哈夫曼树,然后利用其中的哈夫曼编码还原文件信息;
  • heap.h
// heap.h
#pragma once

#include <iostream>
using namespace std;
#include <vector>

template <class T>
struct Less
{
    bool operator ()(const T& left, const T& right)
    {    
        return left < right;
    }
};

template <class T>
struct Less<T*>
{
    bool operator ()( T* nodeleft, T* noderight)
    {
        return (nodeleft->_w) < (noderight->_w);
    }
};

template <class T>
struct Greater
{
    bool operator ()(const T& left, const T& right)
    {
        return left > right;
    }
};


//建 xiao堆
template <class T, class Compare = Less<T>>
class Heap
{
public:
    Heap()
    {}

    Heap(T* a, size_t n)
    {
        _a.reserve(n);
        for (size_t i = 0; i < n; i++)
        {
            _a.push_back(a[i]);
        }

        //建堆
        for (int i = (_a.size()); i >= 0; --i)
        {
            AdjustDown(i);
        }
    }

    //向下调整
    void AdjustDown(int root)
    {
        Compare com;

        size_t parent = root;
        size_t child = parent * 2 + 1;

        while (child < _a.size())
        {
            if ((child + 1) < _a.size() && com(_a[child + 1], _a[child]))
            {
                ++child;
            }
            if(com(_a[child], _a[parent]))
            {
                swap(_a[child], _a[parent]);
                parent = child;
                child = parent * 2 + 1;
            }
            else
            {
                break;
            }
        }
    }

    //向上调整
    void AdjustUp(int child)
    {
        Compare com;
        int parent = (child - 1) / 2;

        while (parent >= 0)
        {
            if (com(_a[child], _a[parent]))
            {
                swap(_a[child], _a[parent]);
                child = parent;
                parent = (child - 1) / 2;
            }
            else
            {
                break;
            }
        }
    }

    size_t size()
    {
        return _a.size();
    }

    void push(const T& x)
    {
        _a.push_back(x);
        AdjustUp(_a.size()-1);//vector大小-1 = 最后一个子节点下标
    }


    void pop()
    {
        if (!empty())
        {
            swap(_a[0], _a[_a.size() - 1]);
            _a.pop_back();
            AdjustDown(0);
        }

    }

    bool empty()
    {
        return _a.empty();
    }

    bool isHeap()
    {

        for (int i = 0; i <+ (_a.size() - 2) / 2; i++)
        {
            if (_a[i] < _a[2 * i + 1] ||
                (_a[i] < _a[2 * i + 2] && (2 * i + 2) < _a.size))
                return false;
        }

        return true;
    }

    T& top()
    {
        if (!empty())
            return _a[0];
    }

    void print()
    {
        for (size_t i = 0 ; i < _a.size(); ++i)
        {
            cout << _a[i] << " ";
        }
        cout << endl;
    }
private:
    vector<T> _a;
};



const size_t N = 10000;
const size_t K = 3;

void AdjustDown(int* heap, int n, int root)
{
    assert(heap);

    int parent = root;
    int child = parent * 2 + 1;

    while (child < n)
    {
        if (child + 1<n && heap[child + 1] < heap[child])
        {
            ++child;
        }

        if (heap[child] < heap[parent])
        {
            swap(heap[child], heap[parent]);
            parent = child;
            child = parent * 2 + 1;
        }
        else
        {
            break;
        }
    }
}




void TopK()
{
    int a[N] = { 0 };
    for (size_t i = 0; i < N; ++i)
    {
        a[i] = rand() % N;
    }

    a[0] = 11111;
    a[100] = 25025;
    a[2] = 48525;




    int heap[K] = { 0 };
    for (size_t i = 0; i < K; ++i)
    {
        heap[i] = a[i];
    }

    // 建堆
    for (int i = (K - 2) / 2; i >= 0; --i)
    {
        AdjustDown(heap, K, i);
    }

    for (size_t i = K; i < N; ++i)
    {
        if (a[i] > heap[0])
        {
            heap[0] = a[i];
            AdjustDown(heap, K, 0);
        }
    }

    for (size_t i = 0; i < K; ++i)
    {
        cout << heap[i] << " ";
    }
}


void HeapSort(int* a, size_t n)
{
    assert(a);

    // 建堆 O(N*lgN)
    for (int i = (n - 2) / 2; i >= 0; --i)
    {
        AdjustDown(a, n, i);
    }

    // 选择
    int end = n - 1;
    while (end > 0)
    {
        swap(a[0], a[end]);
        AdjustDown(a, end, 0);
        --end;
    }
}


void TestHeapSort()
{
    int a[] = { 10,11, 13, 12, 16, 18, 15, 17, 14, 19 };
    HeapSort(a, sizeof(a) / sizeof(a[0]));
    for (size_t i = 0; i < sizeof(a) / sizeof(a[0]); ++i)
    {
        cout << a[i] << " ";
    }
    cout << endl;
}
  • huffmantree.h
// huffmantree.h
#pragma once

#include<assert.h>
#include "Heap.h"

template <class W>
struct HuffmanTreeNode
{
    W _w;    //权值
    HuffmanTreeNode<W>* _left;
    HuffmanTreeNode<W>* _right;
    HuffmanTreeNode<W>* _parent;

    HuffmanTreeNode(const W& w)
        : _w(w)
        , _left(NULL)
        , _right(NULL)
        , _parent(NULL)
    {}
};

template<class T>
class HuffmanTree
{
    typedef HuffmanTreeNode<T> Node;
public:
    HuffmanTree()
        :_root(0)
    {}

    HuffmanTree(T* arr, size_t size, const T& invalid)
    {
        _root = GreatHuffmanTree(arr, size, invalid);
    }

    Node* getHuffmanTree()
    {
        return _root;
    }

private:
    Node* GreatHuffmanTree(T* arr, size_t size, const T& invalid)
    {
        assert(arr);

        //建小堆
        Heap<Node*, Less<Node*>> minheap;
        for (size_t i = 0; i < size; ++i)
        {
            if (arr[i] != invalid)
            {
                Node* tmp = new Node(arr[i]);
                minheap.push(tmp);
            }
        }

        //构建Huffman
        if (minheap.size())
        {
            while (minheap.size() > 1)
            {
                Node* left = minheap.top();
                minheap.pop();
                Node* right = minheap.top();
                minheap.pop();

                Node* newNode = new Node(left->_w + right->_w);

                newNode->_left = left;
                newNode->_right = right;

                left->_parent = newNode;
                right->_parent = newNode;

                minheap.push(newNode);
            }
            return minheap.top();
        }
        else
            return NULL;
    }
private:
    Node* _root;
};
  • file_compress.h
// file_compress.h

#pragma once

#include "HuffmanTree.h"
#include <string>
#include <assert.h>

typedef long long longtype;

struct charInfo
{
    unsigned char _ch;    //存放字符信息0-255
    longtype _count;//统计出现次数
    string _code; //存放huffman编码

    charInfo operator+(const charInfo& info)
    {
        charInfo ret;
        ret._count = _count + info._count;
        return ret;
    }

    bool operator<(const charInfo& info)
    {
        return _count < info._count;
    }

    bool operator>(const charInfo& info)
    {
        return _count > info._count;
    }

    bool operator!=(const charInfo& info)
    {
        return _count != info._count;
    }
};

class FileCompress
{
    typedef HuffmanTreeNode<charInfo> Node;

    struct ConfigInfo //写配置信息时使用
    {
        unsigned char _ch;
        longtype _count;
    };

public:
    FileCompress()
    {
        for (int i = 0; i < 256; i++)
        {
            _infos[i]._ch = i;
            _infos[i]._count = 0;
        }
    }

    //文件压缩
    void Compress(const char* filename)
    {
        assert(filename);

        FILE* fout = fopen(filename,"r");
        assert(fout);

        // 1.统计文件中字符出现的次数
        char ch = fgetc(fout);
        while (ch != EOF)
        {
            _infos[(unsigned char)ch]._count++;
            ch = fgetc(fout);
        }

        // 2.构建huffman tree
        charInfo invalid;
        invalid._count = 0;
        HuffmanTree<charInfo> tree(_infos, 256, invalid);
        Node* root = tree.getHuffmanTree();

        // 3.获得huffman code 
        string code;
        GenerateHuffmancode(root, code);
        //GenerateHuffmanCode(root);

        // 4.压缩
        string compressfile = filename;
        compressfile += ".huffman";

        FILE* fin = fopen(compressfile.c_str(),"w");
        assert(fin);

        // 写配置信息(字符出现次数)
        //fseek(fin, 0, SEEK_SET);
        for (size_t i = 0; i < 256; ++i)
        {
            ConfigInfo info;
            if (_infos[i]._count)
            {
                info._ch = _infos[i]._ch;
                info._count = _infos[i]._count;
                fwrite(&info, sizeof(ConfigInfo), 1, fin);
            }
        }

        //区分配置信息和huffman编码
        ConfigInfo info;
        info._count = 0;
        fwrite(&info, sizeof(ConfigInfo), 1, fin);

        //将string code转换为二进制
        fseek(fout, 0, SEEK_SET);//函数设置文件指针stream的位置 指针回到开始位置

        ch = fgetc(fout);
        char value = 0; //存放二进制
        char pos = 0;

        while (ch != EOF)
        {
            //"aaaabbbccd" 00001010 10111111 1100000
            string& code = _infos[(unsigned char)ch]._code;
            for (size_t i = 0; i < code.size(); ++i)
            {
                if (code[i] == '1')
                {
                    value |= (1 << pos);
                }
                else // '0'
                {
                    value &= (~(1 << pos));
                }

                ++pos;

                if (pos == 8)
                {
                    fputc(value, fin);
                    value = 0; 
                    pos = 0;
                }
            }

            ch = fgetc(fout);
        }

        if (pos)//将不足8位的补齐
        {
            fputc(value, fin);
        }

         写配置信息(字符出现次数)
        //fseek(fin, 0, SEEK_SET);
        //for (size_t i = 0; i < 256; ++i)
        //{
        //    ConfigInfo info;
        //    if (_infos[i]._count)
        //    {
        //        info._ch = _infos[i]._ch;
        //        info._count = _infos[i]._count;
        //        fwrite(&info, sizeof(ConfigInfo), 1, fin);
        //    }
        //}

        区分配置信息和huffman编码
        //ConfigInfo info;
        //info._count = 0;
        //fwrite(&info, sizeof(ConfigInfo), 1, fin);


        //关闭文件流
        fclose(fin);
        fclose(fout);
    }

    void Uncompress(const char* filename)
    {
        assert(filename);

        string uncompressFile = filename;
        size_t index = uncompressFile.rfind('.');
        assert(index != string::npos);
        //uncompressFile.erase(index, uncompressFile.size() - index + 1);
        uncompressFile += ".unhuffman";

        FILE* fin = fopen(uncompressFile.c_str(), "w");
        assert(fin);


        FILE* fout = fopen(filename, "r");
        assert(fout);

        //1.读配置信息
        while (1)
        {
            ConfigInfo info;
            info._count = 0;
            fread(&info, sizeof(ConfigInfo), 1, fout);

            if (info._count)
            {
                _infos[info._ch]._count = info._count;
            }
            else
            {
                break;
            }

        }

        //2.重建Huffman树
        charInfo invalid;
        invalid._count = 0;
        HuffmanTree<charInfo> tree(_infos, 256, invalid);
        Node* root = tree.getHuffmanTree();
        Node* cur = root;

        longtype totalcount = root->_w._count;//所有字符出现总次数

        char value = fgetc(fout);
        size_t pos = 0;
        while (value != EOF)
        {
            if (value&(1 << pos))
            {
                cur = cur->_right;
            }
            else
            {
                cur = cur->_left;
            }

            if ((cur->_left == NULL) && (cur->_right == NULL))
            {
                fputc(cur->_w._ch, fin);
                cur = root;

                if (--totalcount == 0)//解码完成
                {
                    break;
                }
            }

            ++pos;
            if (pos == 8)
            {
                value = fgetc(fout);
                pos = 0;
            }
        }

        fclose(fout);
        fclose(fin);

    }


    //生成Huffman code
    void GenerateHuffmancode(Node* cur, string code) 
    {
        if (cur == NULL)
            return;

        if (cur->_left == NULL && cur->_right == NULL)
        {
            _infos[(unsigned char)cur->_w._ch]._code = code;
            return;
        }

        GenerateHuffmancode(cur->_left, code + '0');
        GenerateHuffmancode(cur->_right, code + '1');
    }

    /*void GenerateHuffmanCode(Node* cur)
    {
        if (cur == NULL)
            return;

        if (cur->_left == NULL && cur->_right == NULL)
        {
            Node* child = cur;
            Node* parent = child->_parent;

            string& code = _infos[cur->_w._ch]._code;
            while (parent)
            {
                if (parent->_left == child)
                {
                    code += '0';
                }
                else
                {
                    code.push_back('1');
                }

                child = parent;
                parent = child->_parent;
            }

            reverse(code.begin(), code.end());
        }

        GenerateHuffmanCode(cur->_left);
        GenerateHuffmanCode(cur->_right);
    }*/

private:
    charInfo _infos[256];//ASCII码表中0-255
};
  • test.cpp
// test.cpp
#include <iostream>
using namespace std;

#include <assert.h>
#include "Heap.h"
#include "HuffmanTree.h"
#include <string>
#include "FileCompress.h"

//Heap 
void test1()
{
    int a[] = { 8, 7, 13, 12, 16, 18, 15, 17, 14, 19 };
    Heap<int> hp1(a, sizeof(a) / sizeof(a[10]));
    hp1.print();

    cout << hp1.size() << endl;
    hp1.push(55);
    hp1.push(54);
    hp1.push(53);
    hp1.pop();
    hp1.print();
}

//TOP K /HeapSort
void test2()
{
    TopK();
    cout << endl;
    TestHeapSort();
}


//文件压缩
void TestFileCompress()
{
    FileCompress fc;
    fc.Compress("input.txt");
    cout << "FileCompress Succeed!" << endl;
}

//文件解压缩
void TestFileUncompress()
{
    FileCompress fc;
    fc.Uncompress("input.txt.huffman");
    cout << "FileUncompress Succeed!" << endl;
}


int main()
{
    //test1();
    //test2();
    TestFileCompress();
    TestFileUncompress();

    system("pause");
    return 0;
}
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值