文件压缩

11 篇文章 0 订阅
10 篇文章 0 订阅

项目名称《文件压缩》

开发环境:windows,vs2013

所用到的技术:heap,哈夫曼树,哈夫曼编码,c++

文件压缩项目的大致思路:

1.利用小堆进行构建哈夫曼树,然后利用哈夫曼树获得要压缩的每个字符的哈夫曼编码

2.利用哈夫曼编码对源文件进行压缩,就是在压缩文件中按顺序存入各字符对应的哈夫曼编码,并编写配置文件

3.利用配置文件获得各个字符及字符出现的次数,再次用小堆构建哈夫曼树

4.利用哈夫曼树对压缩文件进行解压。


下边是具体实现的代码:

Heap.h --->构建小堆

#pragma once    
#include <vector>    
#include<assert.h>    

// 小堆    
template<class T>
struct Less
{
	bool operator() (const T& l, const T& r)
	{
		return l < r;
	}
};

template<class T>
struct Greater
{
	bool operator() (const T& l, const T& r)
	{
		return l > r;
	}
};

template<class T, class Compare = Less<T>>
class Heap
{
public:
	Heap()
	{}

	Heap(const T* a, size_t size)
	{
		for (size_t i = 0; i < size; ++i)
		{
			_arrays.push_back(a[i]);
		}

		// 建堆    
		for (int i = (_arrays.size() - 2) / 2; i >= 0; --i)
		{
			AdjustDown(i);
		}
	}

	void Push(const T& x)
	{
		_arrays.push_back(x);
		AdjustUp(_arrays.size() - 1);
	}

	void Pop()
	{
		assert(_arrays.size() > 0);
		swap(_arrays[0], _arrays[_arrays.size() - 1]);
		_arrays.pop_back();

		AdjustDown(0);
	}

	T& Top()
	{
		assert(_arrays.size() > 0);
		return _arrays[0];
	}

	bool Empty()
	{
		return _arrays.empty();
	}

	int Size()
	{
		return _arrays.size();
	}

	void AdjustDown(int root)
	{
		size_t child = root * 2 + 1;

		Compare com;
		while (child < _arrays.size())
		{
			if (child + 1<_arrays.size() &&
				com(_arrays[child + 1], _arrays[child]))
			{
				++child;
			}


			if (com(_arrays[child], _arrays[root]))
			{
				swap(_arrays[child], _arrays[root]);
				root = child;
				child = 2 * root + 1;
			}
			else
			{
				break;
			}
		}
	}

	void AdjustUp(int child)
	{
		int parent = (child - 1) / 2;


		while (child > 0)
		{
			if (Compare()(_arrays[child], _arrays[parent]))
			{
				swap(_arrays[parent], _arrays[child]);
				child = parent;
				parent = (child - 1) / 2;
			}
			else
			{
				break;
			}
		}
	}

	void Print()
	{
		for (size_t i = 0; i < _arrays.size(); ++i)
		{
			cout << _arrays[i] << " ";
		}
		cout << endl;
	}

public:
	/*T* _array;
	size_t _size;
	size_t _capacity;*/

	vector<T> _arrays;
};

哈夫曼树的实现代码  HuffmanTree.h :

#pragma once    
#include "Heap.h"    
#include<assert.h>    

template<class T>
struct HuffmanTreeNode
{
	HuffmanTreeNode<T>* _left;
	HuffmanTreeNode<T>* _right;
	HuffmanTreeNode<T>* _parent;
	T _weight;

	HuffmanTreeNode(const T& x)
		:_weight(x)
		, _left(NULL)
		, _right(NULL)
		, _parent(NULL)
	{}
};

template<class T>
class HuffmanTree
{
	typedef HuffmanTreeNode<T> Node;

public:

	HuffmanTree()
		:_root(NULL)
	{}

	~HuffmanTree()
	{
		Destory(_root);
	}

	template <class T>
	struct NodeCompare
	{
		bool operator()(Node *l, Node *r)
		{
			return l->_weight < r->_weight;
		}
	};

public:
	void CreatTree(const T* a, size_t size, const T& invalid)
	{
		assert(a);
		Heap<Node*, NodeCompare<T>> minHeap;
		for (size_t i = 0; i < size; ++i)
		{
			if (a[i] != invalid)
			{
				Node* node = new Node(a[i]);
				minHeap.Push(node);
			}
		}

		while (minHeap.Size() > 1)
		{
			Node* left = minHeap.Top();
			minHeap.Pop();
			Node* right = minHeap.Top();
			minHeap.Pop();

			Node* parent = new Node(left->_weight + right->_weight);
			parent->_left = left;
			parent->_right = right;
			left->_parent = parent;
			right->_parent = parent;

			minHeap.Push(parent);
		}

		_root = minHeap.Top();
	}


	Node* GetRootNode()
	{
		return _root;
	}


	void Destory(Node* root)
	{
		if (root)
		{
			Destory(root->_left);
			Destory(root->_right);
			delete root;
			root = NULL;
		}
	}
private:
	HuffmanTreeNode<T>* _root;
};

文件压缩的实现 --->  FileCompress.h

#pragma once  
#include"HuffmanTree.h"  
#include<algorithm>  
#include<windows.h>  
#include<string.h>  
using namespace std;

typedef long long Longtype;//为了扩大其范围,int型能处理的范围已经不能满足,所以定义Long Long型予以表示  

struct CharInfo
{
	unsigned char _ch;//这里必须为unsigned,否则会造成截断,所以从-128~127调至0~255.  
	Longtype _count;
	string _code;

	CharInfo(unsigned char ch = 0)
		:_ch(ch)
		, _count(0)
	{}

	CharInfo operator+(CharInfo& file)
	{
		CharInfo tmp;
		tmp._count = this->_count + file._count;
		return tmp;
	}

	bool operator < (CharInfo& file)
	{
		return this->_count < file._count;
	}

	bool operator != (const CharInfo& file)const
	{
		return this->_count != file._count;
	}
};


template<class T>
class FileCompress
{
public:
	FileCompress()
	{
		for (int i = 0; i < 256; ++i)
		{
			_arr[i]._ch = i;
		}
	}

public:

	bool Compress(const char* filename)
	{
		//1.打开文件,统计文件字符出现的次数    
		long long Charcount = 0;
		assert(filename);
		FILE* fOut = fopen(filename, "rb");//"rb"为以二进制方式读取文件,这里的b就是binary。"wb"为以二进制方式写入文件  
		assert(fOut);					//以二进制和文本打开方式区别在于:以文本打开方式会将\r\n
										//转换为\n,二进制这不会有这样的转换
		char ch = fgetc(fOut);

		while (ch != EOF)
		{
			_arr[(unsigned char)ch]._count++;
			ch = fgetc(fOut);
			Charcount++;
		}

		//2.生成对应的huffman编码    
		GenerateHuffmanCode();

		//3.文件压缩    
		string compressFile = filename;
		compressFile += ".compress";
		FILE* fwCompress = fopen(compressFile.c_str(), "wb");
		assert(fwCompress);

		fseek(fOut, 0, SEEK_SET);
		ch = fgetc(fOut);
		char inch = 0;
		int index = 0;
		while (!feof(fOut))
		{
			string& code = _arr[(unsigned char)ch]._code;
			for (size_t i = 0; i < code.size(); ++i)
			{
				inch = inch << 1;
				if (code[i] == '1')
				{
					inch |= 1;
				}
				if (++index == 8)//对于形成的长串字符编码的切割,每8个bit为一个字节,便于读取  
				{
					fputc(inch, fwCompress);
					inch = 0;
					index = 0;
				}
			}
			ch = fgetc(fOut);
		}

		if (index)//考虑到可能会有切割完,剩余的字符码不够填充8个bit位的情况  
		{
			inch = inch << (8 - index);
			fputc(inch, fwCompress);
		}

		//4.配置文件,方便后续的解压缩;  
		string configFile = filename;
		configFile += ".config";
		FILE *fconfig = fopen(configFile.c_str(), "wb");
		assert(fconfig);

		char CountStr[128];
		_itoa(Charcount >> 32, CountStr, 10);
		fputs(CountStr, fconfig);
		fputc('\n', fconfig);
		_itoa(Charcount & 0xffffffff, CountStr, 10);
		fputs(CountStr, fconfig);
		fputc('\n', fconfig);

		CharInfo invalid;
		for (int i = 0; i < 256; i++)
		{
			if (_arr[i] != invalid)
			{
				fputc(_arr[i]._ch, fconfig);
				fputc(',', fconfig);
				fputc(_arr[i]._count + '0', fconfig);
				fputc('\n', fconfig);
			}
		}

		fclose(fOut);
		fclose(fwCompress);
		fclose(fconfig);

		return true;
	}
	//文件的解压  
	bool UnCompresss(const char* filename)
	{
		string configfile = filename;
		configfile += ".config";
		FILE* outConfig = fopen(configfile.c_str(), "rb");
		assert(outConfig);
		char ch;
		long long Charcount = 0;
		string line = ReadLine(outConfig);
		Charcount = atoi(line.c_str());
		Charcount <<= 32;
		line.clear();
		line = ReadLine(outConfig);
		Charcount += atoi(line.c_str());
		line.clear();

		while (feof(outConfig))
//feof()遇到文件结束,函数值为非零值,否则为0。当把数据以二进制的形式进行存放时,可能会有-1值的出现,
//所以此时无法利用-1值(EOF)做为eof()函数判断二进制文件结束的标志。  
		{
			line = ReadLine(outConfig);
			if (!line.empty())
			{
				ch = line[0];
				_arr[(unsigned char)ch]._count += atoi(line.substr(2).c_str());
				line.clear();
			}
			else
			{
				line = '\n';
			}
		}

		HuffmanTree<CharInfo> ht;
		CharInfo invalid;
		ht.CreatTree(_arr, 256, invalid);//重新建树

		HuffmanTreeNode<CharInfo>* root = ht.GetRootNode();

		string  UnCompressFile = filename;
		UnCompressFile += ".uncompress";
		FILE* fOut = fopen(UnCompressFile.c_str(), "wb");

		string CompressFile = filename;
		CompressFile += ".compress";
		FILE* fIn = fopen(CompressFile.c_str(), "rb");

		int pos = 8;
		HuffmanTreeNode<CharInfo>* cur = root;
		ch = fgetc(fIn);

		while ((unsigned char)ch != EOF)
		{
			--pos;
			if ((unsigned char)ch &(1 << pos))
			{
				cur = cur->_right;
			}
			else
			{
				cur = cur->_left;
			}
			if (cur->_left == NULL && cur->_right == NULL)
			{
				fputc(cur->_weight._ch, fOut);
				cur = root;
				Charcount--;
			}
			if (pos == 0)
			{
				ch = fgetc(fIn);
				pos = 8;
			}
			if (Charcount == 0)
			{
				break;
			}
		}

		fclose(outConfig);
		fclose(fIn);
		fclose(fOut);
		return true;
	}

protected:
	string ReadLine(FILE* fConfig)
	{
		char ch = fgetc(fConfig);
		if (ch == EOF)
		{
			return "";
		}
		string line;
		while (ch != '\n' && ch != EOF)
		{
			line += ch;
			ch = fgetc(fConfig);
		}
		return line;
	}

	void GenerateHuffmanCode()
	{
		HuffmanTree<CharInfo> hft;
		CharInfo invalid;

		hft.CreatTree(_arr, 256, invalid);
		_GenerateHuffmanCode(hft.GetRootNode());
	}

	void _GenerateHuffmanCode(HuffmanTreeNode<CharInfo>* root)
	{
		if (root == NULL)
		{
			return;
		}

		_GenerateHuffmanCode(root->_left);
		_GenerateHuffmanCode(root->_right);

		if (root->_left == NULL && root->_right == NULL)
		{
			HuffmanTreeNode<CharInfo>* cur = root;
			HuffmanTreeNode<CharInfo>* parent = cur->_parent;
			string& code = _arr[cur->_weight._ch]._code;

			while (parent)
			{
				if (parent->_left == cur)
				{
					code += '0';
				}
				else if (parent->_right == cur)
				{
					code += '1';
				}
				cur = parent;
				parent = cur->_parent;
			}

			reverse(code.begin(), code.end());
		}
	}

private:
	CharInfo _arr[256];
};

void TestFileCompress()
{

	FileCompress<CharInfo> fc;
	cout << "Input文件压缩中...." << endl;
	cout << "压缩用时: ";
	int begin1 = GetTickCount();
	fc.Compress("Input");//  
	int end1 = GetTickCount();//  
	cout << end1 - begin1 << endl << endl;
	
	cout << "Input文件解压中...." << endl;;
	cout << "解压用时: ";
	int begin2 = GetTickCount();
	fc.UnCompresss("Input");
	int end2 = GetTickCount();//用以测试解压用时  
	cout << end2 - begin2 << endl << endl;

	FileCompress<CharInfo> fc1;
	
	cout << "Input.BIG文件压缩中...." << endl;
	cout << "压缩用时: ";
	int begin3 = GetTickCount();
	fc1.Compress("Input.BIG");//  
	int end3 = GetTickCount();//  
	cout << end3 - begin3 << endl << endl;

	cout << "Input.BIG文件解压中...." << endl;
	cout << "解压用时: ";
	int begin4 = GetTickCount();
	fc1.UnCompresss("Input.BIG");
	int end4 = GetTickCount();  
	cout << end4 - begin4 << endl;
}

main函数:

#define _CRT_SECURE_NO_WARNINGS    
#include <iostream>    
using namespace std;
#include "FileCompress.h"    


int main()
{
	TestFileCompress();
	return 0;
}

下边是项目运行结果截图:



ps:图中的比较软件为 BeyondCompare4



















  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值