文件压缩

项目名称:文件压缩

开发环境:window8.1 ——VS2013

使用对象:文本文件(.txt)

开发语言:C/C++

使用工具:VS2013 、 Beyond compare、Linux下md5sum

项目技术:Huffman树,I/O流,模板,String容器,文件操作等

项目描述
<1>.文件压缩过程:先统计文件中每个ASSCII码出现的次数,采用贪心算法,用文件中ASCII码出现的次数构建Huffman树,生成每个ASCII所对应的Huffman编码,然后将原文件中的ASCII码,替换成Huffman编码,并将文件中出现过的所有ASCII及其次数存入压缩文件起始位置,从而降低文件的大小。
<2>.文件解压过程:读取压缩文件头部所存的ASCII及其次数,用其生成Huffman树,将Huffman编码还原成ASCII码。


项目概略图:


这里写图片描述


项目测试数据:


这里写图片描述


项目性能分析:


VS2013环境下测试,该项目平均压缩率在78%左右,Release版本下,每秒可压缩文件5M。


项目中所遇到的问题:


1 . 程序崩于文件统计次数,无法完成统计

2 . 文件压缩过程进去死循环,无法正常完成压缩

3 . 文件越压越大

4 . 文件无法正常完成解压缩

5 . 对于只有一种字符的文件无法压缩


项目模块分析图:


模拟生成Huffaman树
这里写图片描述


模拟生成Huffman编码
这里写图片描述


Huffman编码存储


这里写图片描述


项目总结


经多次多项测试,采用加权算法求得平均值,本项目可以针对文本文件进行压缩,压缩率可达78%,每秒可压缩文件5M。


项目源码:

构思源码链接


/****************** "HuffmanTree.h" *********************/
#pragma once

#include <iostream>
#include <vector>
#include <queue>
using namespace std;

template <class W>
struct HuffmanTreeNode {

    W _w;//权值 次数
    HuffmanTreeNode<W>* _left;//左孩子
    HuffmanTreeNode<W>* _right;//右孩子
    HuffmanTreeNode<W>* _parent;//父亲

    HuffmanTreeNode(const W& w)
        :_left(NULL)
        , _right(NULL)
        , _w(w)
        , _parent(NULL)
    {}

};

//构造哈夫曼树
template <class W>
class HuffmanTree {

    typedef HuffmanTreeNode<W> Node;

    struct PtrNodeCompare{

        bool operator()(Node* left, Node* right){
            return left->_w > right->_w;
        }
    };

public:
    HuffmanTree()
        :_root(NULL)
    {}

    HuffmanTree(W* w, size_t n, const W& invalid){

        priority_queue <Node*, vector<Node*>, PtrNodeCompare> minheap;
        for (size_t i = 0; i < n; ++i){
            if (w[i] != invalid){
                minheap.push(new Node(w[i]));
            }
        }

        if (minheap.size() == 0){

            cout << "无效文件" << endl;
            exit(0);
        }
        else if (minheap.size() == 1){
            Node* right = minheap.top();
            _root = new Node(right->_w);
            right->_parent = _root;
            _root->_right = right;
        }
        else{
            while (minheap.size() > 1){

                Node* left = minheap.top();
                minheap.pop();

                Node* right = minheap.top();
                minheap.pop();

                Node* parent = new Node(left->_w + right->_w);

                parent->_left = left;
                parent->_right = right;

                left->_parent = parent;
                right->_parent = parent;

                minheap.push(parent);
            }

            _root = minheap.top();
        }
    }

    Node* GetRoot(){
        return _root;
    }

    ~HuffmanTree(){
        Destory(_root);
        _root = NULL;
    }

    void Destory(Node* root){
        if (root == NULL){
            return;
        }
        Destory(root->_left);
        Destory(root->_right);
        delete root;
    }

private:
    HuffmanTree(const HuffmanTree<W>& t); 
    HuffmanTree<W>& operator= (const HuffmanTree<W>& t); 

protected:
    Node* _root;
};

/******************** "FileCompress.h" **********************/

#pragma once

#include <string>
#include <stdio.h>
#include <windows.h>
#include <fstream>
#include <algorithm>
#include <assert.h>
#include "HuffmanTree.h"
#pragma warning (disable: 4996)
using namespace std;


typedef long long LongType;

struct CharInfo{

    char _ASCII; //字符
    LongType _count;//次数
    string _code;//权值   

    CharInfo operator+(const CharInfo& info){
        CharInfo ret;
        ret._count = _count + info._count;
        return ret;
    }

    bool operator>(const CharInfo& info){
        return _count > info._count;
    }

    bool operator<(const CharInfo& info){
        return _count < info._count;
    }

    bool operator!=(const CharInfo& info){
        return _count != info._count;
    }

    bool operator==(const CharInfo& info){
        return _count == info._count;
    }

};



class FileCompress{

    typedef HuffmanTreeNode<CharInfo> Node;

    struct TmpInfo{
        char _ch; //字符
        LongType _num;//数量
    };

public:
    FileCompress(){
        for (size_t i = 0; i < 256; ++i){
            _infos[i]._ASCII = i;
            _infos[i]._count = 0;
        }
    }

    //1.统计字符出现的次数
    void Compress(const char* file){

        FILE* f_out = fopen(file, "r");
        assert(f_out);

        char character = fgetc(f_out);

        while (character != EOF){
            _infos[(unsigned char)character]._count++; 
            character = fgetc(f_out);
        }

        //2.构建huffman tree

        CharInfo invalid;
        invalid._count = 0;
        HuffmanTree<CharInfo> tree(_infos, 256, invalid);

        //3.生成code
        GenerateHuffmanCode(tree.GetRoot());

        //4.压缩
        string compressFile = file;  
        compressFile += ".huffman";

        FILE* f_in = fopen(compressFile.c_str(), "wb");

        for (size_t i = 0; i < 256; ++i){
            if (_infos[i]._count > 0){
                TmpInfo tmpinfo;
                tmpinfo._ch = _infos[i]._ASCII; //字符
                tmpinfo._num = _infos[i]._count;//次数

                //解压时需要用其生成Huffman树 
                fwrite(&tmpinfo, sizeof(TmpInfo), 1, f_in);
            }
        }

        TmpInfo tmpinfo;
        tmpinfo._num = -1;
        fwrite(&tmpinfo, sizeof(TmpInfo), 1, f_in);
        rewind(f_out);

        character = fgetc(f_out); 
        size_t CeffectiveBit = 0;
        char value = 0;
        int pos = 0;

        while (character != EOF){

            string& code = _infos[(unsigned char)character]._code;
            for (size_t i = 0; i < code.size(); ++i){

                if (code[i] == '0'){
                    value &= ~(1 << pos);
                }
                else if (code[i] == '1'){
                    value |= (1 << pos);
                }
                else{
                    assert(false);
                }

                ++pos;

                if (pos == 8){
                    fputc(value, f_in);
                    value = 0;
                    pos = 0;
                }
                ++CeffectiveBit;

            }
            character = fgetc(f_out); 
        }
        cout << "压缩Huffman编码长度:" << CeffectiveBit<< endl;

        if (pos > 0){
            fputc(value, f_in);
        }

        fclose(f_out);
        fclose(f_in);
    }


    void GenerateHuffmanCode(Node* root){
        if (root == NULL){
            return;
        }
        if (root->_left == NULL && root->_right == NULL)  {

            string code; 
            Node* cur = root;
            Node* parent = cur->_parent;

            while (parent) {
                if (cur == parent->_left){
                    code.push_back('0');
                }
                else{
                    code.push_back('1');
                }
                cur = parent;
                parent = cur->_parent;
            }

            reverse(code.begin(), code.end());

            _infos[(unsigned char)root->_w._ASCII]._code = code;
            return;
        }
        GenerateHuffmanCode(root->_left);
        GenerateHuffmanCode(root->_right);
    }

    /**************************   解压缩   *******************************/

    void Uncompress(const char* file){
        assert(file);

        string uncompressFile = file;
        size_t pos = uncompressFile.rfind('.');    
        assert(pos != string::npos);

        uncompressFile.erase(pos, uncompressFile.size() - pos);
        uncompressFile += ".unhuffman";//测试所用  分辨与源文件的区别

        FILE* fout = fopen(file, "rb");
        assert(fout);

        FILE* fin = fopen(uncompressFile.c_str(), "w");
        assert(fin);

        TmpInfo info;
        fread(&info, sizeof(TmpInfo), 1, fout);

        while (info._num != -1){
            _infos[(unsigned char)info._ch]._ASCII = info._ch;
            _infos[(unsigned char)info._ch]._count = info._num;
            fread(&info, sizeof(TmpInfo), 1, fout);
        }

        //1.重建huffman tree 
        size_t n = 0;
        CharInfo invalid;
        invalid._count = 0;
        HuffmanTree<CharInfo> tree(_infos, 256, invalid);
        Node* root = tree.GetRoot();
        Node* cur = root;
        LongType fileSize = root->_w._count;
        cout <<"文件中ASCII个数:" <<fileSize << endl;

        //2.解压缩
        char value = fgetc(fout);
        while (1){
            for (size_t pos = 0; pos < 8; ++pos){

                if (value & (1 << pos)){
                    cur = cur->_right;
                }
                else{
                    cur = cur->_left;
                }

                ++n;

                if (NULL == cur->_left && NULL == cur->_right){

                    fputc(cur->_w._ASCII, fin); 
                    cur = root;

                    if (--fileSize == 0){
                        break;
                    }
                }

            }
            if (fileSize == 0){
                break;
            }
            value = fgetc(fout);
        }
        cout <<"解压Huffman编码长度:"<< n << endl;

        fclose(fout);
        fclose(fin);
    }




protected:

    CharInfo _infos[256];

};//类结束

/******************* Test.cpp *********************/

#include "FileCompress.h"


void TestFilecompress()
{
    FileCompress fc;

    size_t start = GetTickCount();
    fc.Compress("Input.txt");
    size_t end = GetTickCount();
    cout << "UseTime:" << end - start << endl;

    fc.Uncompress("Input.txt.huffman");
}

int main(){

    TestFilecompress();

    system("pause");
    return 0;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值