项目：文件压缩(huffman树)

最新推荐文章于 2023-04-19 12:02:25 发布

wn12-02

最新推荐文章于 2023-04-19 12:02:25 发布

阅读量254

点赞数

分类专栏： Linux

本文链接：https://blog.csdn.net/weixin_41289858/article/details/82112332

版权

Linux 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

原理：

将每个字符转换成对应的编码，而编码都是由0和1组成，那么就可以用比特位来表示编码对应的字符，从而实现文件压缩；同样，再根据压缩的步骤反推，即可实现解压缩

思路：
1、统计文件中各个字符出现的次数

定义一个哈希表，用直接定址法，统计各个字符出现的次数； 
要注意的是：字符的大小是-127~128，而哈希表的范围是0~255，所以使用直接定址法的时候需强转成无符号

2、构建huffman树（贪心算法）

(1)将哈希表中的元素插入小堆；
(2)取堆中最小的两个元素相加，然后将结果插入堆中；
(3)调整堆，使其保持小堆；
(4)重复上述步骤，直到最后一个元素。

3、生成huffman树

这里写图片描述

4、压缩

将每个字符对应的huffman编码换成比特位，写入压缩文件

5、解压缩：

再将压缩文件中的比特位一一读取，寻找huffman树中对应的字符，写入解压缩后的文件

步骤：

压缩过程 
①使用ifstream函数读取文件Input.txt文件，统计字符出现次数 
②将字符信息：字符_ch、字符出现次数_count、字符的huffman编码_code,存入结构体CharInfo 
③将字符对应的CharInfo按_ch直接定址，放入哈希表_hashInfo中 
④使用priority_queue创建huffman树，再遍历huffman树，将huffman编码写入哈希表中对应的CharInfo的_code里 
⑤将哈希表中CharInfo的_count>0的元素使用ofstream函数写入文件Input.txt.huffman中，最后再加一个_count=0的CharInfo作为分界线 
⑥再将每个字符的huffman编码按照“位”，使用ofstream函数写入文件Input.txt.huffman中，压缩完毕 
解压过程 
⑦使用ofstream函数读取文件Input.txt.huffman中分界线前部分，即哈希表的内容 
⑧根据上步创建的哈希表内容，再使用priority_queue创建huffman树 
⑨根据哈希表中CharInfoL的_code遍历huffman树，找到对应的字符，使用ofstream函数写入文件Input.txt.unhuffman中，解压完毕

全部代码：
HuffmanTree.h

#pragma once

#include<queue>
//#include<vector>

template<class W>
struct HuffmanTreeNode
{
    HuffmanTreeNode<W>* _left;
    HuffmanTreeNode<W>* _right;
    W _w; //权值

    HuffmanTreeNode(const W& w)
        :_w(w)
        , _left(NULL)
        , _right(NULL)
    {}
};

template<class W>
class HuffmanTree
{
    typedef HuffmanTreeNode<W> Node;
public:
    struct NodeCompare
    {
        //仿函数，使之按照结点内的权值比较大小，而不是按指针
        //这种只比较，不修改的写成const最好
        //但是随之而来的问题就是，如果w是自定义类型，而不仅仅是内置类型
        //那么就要注意，const修饰的l和r能不能调用w内部的函数
        //即const对象不能调用非const函数
        bool operator()(Node* l, Node* r)
        {
            return l->_w > r->_w;
        }
    };

    HuffmanTree(W* a, size_t n,const W& invalid)   //判断非法值
        :_root(NULL)
    {
        //构建树 优先级队列
        priority_queue<Node*, vector<Node*>, NodeCompare> minheap;
        for (size_t i = 0; i < n; i++)
        {
            if (a[i] != invalid)
                minheap.push(new Node(a[i]));
        }

        while (minheap.size() > 1)
        {
            Node* left = minheap.top();
            minheap.pop();
            Node* right = minheap.top();
            minheap.pop();

            //构建父节点
            Node* parent = new Node(left->_w + right->_w);
            parent->_left = left;
            parent->_right = right;

            //将父节点放回队列
            minheap.push(parent);
        }
        _root = minheap.top();
    }

    ~HuffmanTree()
    {
        //释放树的空间
        Destroy(_root);
        _root = NULL;
    }

    void Destroy(Node* root)
    {
        if (root == NULL)
            return;
        Destroy(root->_left);
        Destroy(root->_right);
        delete(root);
    }

    Node* GetRoot()
    {
        return _root;
    }

private:
    Node* _root;

private:
    //防拷贝
    HuffmanTree(const HuffmanTree<W>& h);
    HuffmanTree<W>& operator=(const HuffmanTree<W>& h);
};

FileComparess.h

#pragma once
#include<fstream>
#include<string>
#include<algorithm>
#include<assert.h>
#include<time.h>

#include"HuffmanTree.h"
using namespace std;

typedef long long LongType;

#define FORM_LENGTH 4   //常见的文件格式长度，如*.txt，扩展名长度为4
#define COMPRESS_FORM ".huffman"    //压缩文件格式

struct CharInfo
{
    char _ch;           //字符
    LongType _count;    //字符出现的次数
    string _code;       //字符对应的huffman编码

    CharInfo operator+(const CharInfo& info)//重载 +
    {
        CharInfo tmp;
        tmp._count = _count + info._count;
        return tmp;
    }
    bool operator>(const CharInfo& info) const//重载 >
    {
        return _count > info._count;
    }
    bool operator!=(const CharInfo& info) const//重载 !=
    {
        return _count != info._count;
    }
};

class FileCompress
{
    typedef HuffmanTreeNode<CharInfo> Node;

public:
    //专门用于在压缩文件里保存字符出现次数的结构体
    //没必要使用CharInfo，因为有string code这一项的浪费开销
    struct ConfigInfo
    {
        char _ch;
        LongType _count;
    };


    FileCompress()//初始化哈希表
    {
        for (size_t i = 0; i < 256; ++i)
        {
            _hashInfos[i]._ch = i;
            _hashInfos[i]._count = 0;
        }
    }

    //统计文件中字符出现的次数
    void GetCharCount(ifstream& ifs, const char*file)
    {
        char ch;
        while (ifs.get(ch))
        {
            ++_hashInfos[(unsigned char)ch]._count;//这里的ch必须转换成无符号的，因为_hashInfos的下标是从0到255
        }
    }

    bool IsExist(const char* file)//判断文件是否存在
    {
        ifstream ifs(file);
        if (!ifs)
        {
            return false;//不存在
        }
        return true;//存在
    }

    string CompressForm(const char* file, char* form)//压缩前的准备和检验
    {
        string compressfile = file;
        size_t index = compressfile.rfind('.');//从后往前找到文件名中的“.”
        assert(index != string::npos);
        strcpy(form, file + index);//保存源文件格式
        compressfile.erase(index);//删除后缀
        index = compressfile.rfind('\\');//从后往前找到文件名中的第一个“\”
        ++index;
        string name;
        for (size_t i = index; file[i] != '.'; ++i)//保存源文件名
        {
            name.push_back(file[i]);
        }
        compressfile.erase(index);//删除源文件名


        printf("是否压缩到指定文件？\n");
        printf("1.是\t\t\t2.否\n");

        int select = 0;
        while (1)
        {
            scanf("%d", &select);
            if (select == 1)
            {
                char newRoad[100] = { 0 };
                printf("请输入指定目录：(如：D:\\Game\\ “\\”不能漏写)\n");
                scanf("%s", newRoad);
                string road(newRoad);
                if (IsExist((road + name + COMPRESS_FORM).c_str()))
                {
                    printf("文件已存在，请选择：\n");
                    printf("1.替换\t\t\t2.重命名\n");
                    scanf("%d", &select);
                    if (select == 2)
                    {
                        char newName[100] = { 0 };
                        scanf("%s", newName);
                        road += newName;
                    }
                    else
                    {
                        road += name;
                    }
                }
                else
                {
                    road += name;
                }
                road += COMPRESS_FORM;
                return road;
            }
            else if (select == 2)
            {
                if (IsExist((compressfile + name + COMPRESS_FORM).c_str()))
                {
                    printf("文件已存在，请选择：\n");
                    printf("1.替换\t\t\t2.重命名\n");
                    scanf("%d", &select);
                    if (select == 2)
                    {
                        char newName[100] = { 0 };
                        scanf("%s", newName);
                        compressfile += newName;
                    }
                    else
                    {
                        compressfile += name;
                    }
                }
                else
                {
                    compressfile += name;
                }
                compressfile += COMPRESS_FORM;//替换成指定的压缩文件格式
                return compressfile;
            }
            else
            {
                printf("输入有误，重新选择:\n");
            }
        }
    }

    string UncompressForm(const char* file, char* form)//解压缩前的准备和检验
    {
        string compressfile = file;
        size_t index = compressfile.rfind('.');//从后往前找到文件名中的“.”
        assert(index != string::npos);
        compressfile.erase(index);//删除后缀
        index = compressfile.rfind('\\');//从后往前找到文件名中的第一个“\”
        ++index;
        string name;
        for (size_t i = index; file[i] != '.'; ++i)//保存源文件名
        {
            name.push_back(file[i]);
        }
        compressfile.erase(index);//删除压缩文件名

        printf("是否解压到指定文件夹？\n");
        printf("1.是\t\t\t2.否\n");

        int select = 0;
        while (1)
        {
            scanf("%d", &select);
            if (select == 1)
            {
                char str[100] = { 0 };
                printf("请输入指定目录：(如：D:\\Game\\ “\\”不能漏写)\n");
                scanf("%s", str);
                string road(str);
                if (IsExist((road + name + form).c_str()))
                {
                    printf("文件已存在，请选择：\n");
                    printf("1.替换\t\t\t2.重命名\n");
                    scanf("%d", &select);
                    if (select == 2)
                    {
                        char newName[100] = { 0 };
                        printf("请输入新的名字：\n");
                        scanf("%s", newName);
                        road += newName;
                    }
                    else
                    {
                        road += name;
                    }
                }
                else
                {
                    road += name;
                }
                road += form;
                return road;
            }
            else if (select == 2)
            {
                if (IsExist((compressfile + name + form).c_str()))
                {
                    printf("文件已存在，请选择：\n");
                    printf("1.替换\t\t\t2.重命名\n");
                    scanf("%d", &select);
                    if (select == 2)
                    {
                        char newName[100] = { 0 };
                        printf("请输入新的名字：\n");
                        scanf("%s", newName);
                        compressfile += newName;
                    }
                    else
                    {
                        compressfile += name;
                    }
                }
                else
                {
                    compressfile += name;
                }
                compressfile += form;//替换成指定的压缩文件格式
                return compressfile;
            }
            else
            {
                printf("输入有误，重新选择:\n");
            }
        }
    }

    void GenerateHuffmanCode(Node* root)//获取huffman编码
    {
        if (root == NULL)
            return;
        if (root->_left == NULL&&root->_right == NULL)
        {
            _hashInfos[(unsigned char)root->_w._ch]._code = root->_w._code;
            return;
        }
        if (root->_left != NULL)
        {
            root->_left->_w._code = root->_w._code + '0';
            GenerateHuffmanCode(root->_left);
        }
        if (root->_right != NULL)
        {
            root->_right->_w._code = root->_w._code + '1';
            GenerateHuffmanCode(root->_right);
        }
    }

    void Compress(const char* file)
    {
        ifstream ifs(file, ios::in | ios::binary);//0.打开源文件

        GetCharCount(ifs, file);//1.统计文件中字符出现的次数

        CharInfo invalid;
        invalid._count = 0;
        HuffmanTree<CharInfo> tree(_hashInfos, 256, invalid);//2.生成huffman树

        GenerateHuffmanCode(tree.GetRoot());//3.生成huffman编码

        char form[FORM_LENGTH + 1] = { 0 };
        string compressfile = CompressForm(file, form);//4.压缩前的准备和检验
        ofstream ofs(compressfile.c_str(), ios::out | ios::binary);//创建该文件名的文件，并写入内容

        //计时器
        clock_t start = 0, end = 0;
        start = clock();

        //5.压缩
        //压缩三部分内容：源文件格式+字符出现次数信息+源文件内容信息
        for (size_t i = 0; i < FORM_LENGTH + 1; ++i)//5.1将源文件后缀，即格式写入压缩文件，方便解压缩的时候恢复原格式
        {
            ofs.put(form[i]);
        }

        for (size_t i = 0; i < 256; ++i)//5.2将_hashInfos中字符出现次数>0的元素写入压缩文件
        {
            if (_hashInfos[i]._count>0)
            {
                ConfigInfo info;
                info._ch = _hashInfos[i]._ch;
                info._count = _hashInfos[i]._count;
                ofs.write((const char*)&info, sizeof(ConfigInfo));
            }
        }

        ConfigInfo over;
        over._count = 0;
        ofs.write((const char*)&over, sizeof(ConfigInfo));//5.3设置分界线

        char ch;
        char value = 0;
        int pos = 0;
        ifs.clear();
        ifs.seekg(0);
        while (ifs.get(ch))//5.4将huffman编码写入
        {
            string& code = _hashInfos[(unsigned char)ch]._code;
            for (size_t i = 0; i < code.size(); ++i)
            {
                if (code[i] == '0')
                    value &= (~(1 << pos));
                else if (code[i] == '1')
                    value |= (1 << pos);
                else
                    assert(false);
                ++pos;
                if (pos == 8)
                {
                    ofs.put(value);
                    pos = 0;
                    value = 0;
                }
            }

        }
        if (pos > 0)
        {
            ofs.put(value);
        }
        end = clock();
        printf("压缩用时：%d ms\n", end - start);
    }

    void UnCompress(const char* file)
    {
        ifstream ifs(file, ios::in | ios::binary);//0.打开压缩文件

        //2.读取三部分信息：源文件格式+字符出现次数信息+源文件内容信息

        char form[FORM_LENGTH + 1];
        for (size_t i = 0; i < FORM_LENGTH + 1; ++i)//2.读取源文件格式信息
        {
            ifs.get(form[i]);
        }

        string uncompressfile = UncompressForm(file, form);//3.解压缩前的准备和检验
        ofstream ofs(uncompressfile.c_str(), ios::out | ios::binary);//创建该文件名的文件，并写入内容

        while (1)//4.读取字符串出现次数信息
        {
            ConfigInfo info;
            ifs.read((char*)&info, sizeof(ConfigInfo));
            if (info._count > 0)
            {
                _hashInfos[(unsigned char)info._ch]._count = info._count;
            }
            else
            {
                break;
            }
        }

        //计时器
        clock_t start = 0, end = 0;
        start = clock();

        //5.重建huffman树
        CharInfo invalid;
        invalid._count = 0;
        HuffmanTree<CharInfo> tree(_hashInfos, 256, invalid);

        //6.解压缩
        //根据读取的huffman编码在huffman树中找到对应字符，写入到文件中
        Node* root = tree.GetRoot();
        LongType filecount = root->_w._count;
        Node* cur = root;
        char ch;
        while (ifs.get(ch))
        {
            for (size_t i = 0; i < 8; ++i)
            {
                if (ch&(1 << i))//1
                    cur = cur->_right;
                else//0
                    cur = cur->_left;
                if (cur->_left == NULL&&cur->_right == NULL)
                {
                    ofs.put(cur->_w._ch);
                    cur = root;
                    if (--filecount == 0)
                    {
                        break;
                    }
                }
            }
        }
        end = clock();
        printf("解压用时：%d ms\n", end - start);
    }
private:
    CharInfo _hashInfos[256];
};

void TestCompress(const char* filename)
{
    FileCompress fc;
    fc.Compress(filename);
}

void TestUnCompress(const char* filename)
{
    FileCompress fc;
    fc.UnCompress(filename);
}

test.cpp

//#define _CRT_SECURE_NO_WARNINGS

#include<iostream>
#include<stdlib.h>
#include"FileComparess.h"
using namespace std;

void menu()
{
    printf("********欢迎使用Huffman文件压缩********\n");
    printf("\n");
    printf("********1、文件压缩 2、解压文件*********\n");

    int select = 0;
    scanf("%d", &select);
    switch (select)
    {
    case 1:
    {
              printf("请输入文件所在路径和文件格式：");
              char filename[100] = { 0 };
              scanf("%s", filename);
              TestCompress(filename);
    }
        break;
    case 2:
    {
              printf("请输入文件所在路径和文件格式：");
              char filename[100] = { 0 };
              scanf("%s", filename);
              TestUnCompress(filename);
    }
        break;
    default:
        break;
    }

}

int main()
{
    menu();
    //system("pause");
    return 0;
}

wn12-02

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
项目：文件压缩(huffman树)

原理：将每个字符转换成对应的编码，而编码都是由0和1组成，那么就可以用比特位来表示编码对应的字符，从而实现文件压缩；同样，再根据压缩的步骤反推，即可实现解压缩思路： 1，统计文件中各个字符出现的次数定义一个哈希表，用直接定址法，统计各个字符出现的次数；要注意的是：字符的大小是-127~128，而哈希表的范围是0~255，所以使用直接定址法的时候需强转成无符号2，构建哈夫...
复制链接

扫一扫