项目:文件压缩及解压缩

项目描述:实现文件的压缩及解压缩。 

开发平台:VS2013 

开发技术:堆,Huaffman树,文件输入输出函数 
项目特点:
1.统计文件中字符出现的次数,利用数据结构堆建造Huffman树,出现次数多的编码短,出现次数少的编码长。 
2.根据建造好的Huffman树形成编码,以对文件进行压缩。
3.将文件中出现的字符以及他们出现的次数写入配置文件,以便后续的解压缩。 
4.根据配置文件读取相关信息重建Huffman树,对压缩后的文件进行译码。

项目成果:该文件压缩可将8k文件大概压缩至6k,解压缩可无错误的还原原文件。

注:由于文件较大,不易用眼睛看出原文件和解压后文件是否完全相同,这里借助软件Beyond Compare来检查两文件是否完全相同


项目代码:

FileCompress.h

#pragma once

#include<iostream>
#include<stdio.h>
#include<string>
#include "Heap.h"
#include<assert.h>


using namespace std;
template<class T>
struct HaffManTreeNode
{
	HaffManTreeNode<T>* _left;
	HaffManTreeNode<T>* _right;
	T _weight;

	HaffManTreeNode(const T& w)
		:_left(NULL)
		, _right(NULL)
		, _weight(w)
	{
	}

};
struct CharInfo
{
	typedef unsigned long LongType;

	unsigned char _ch;
	LongType _count;
	string _code;

	CharInfo(LongType count = 0)
		:_count(count)
	{}

	CharInfo operator +(const CharInfo right) const
	{
		return CharInfo(_count + right._count);
	}

	bool operator !=(const CharInfo right) const
	{
		return _count != right._count;
	}
	bool operator >(const CharInfo right) const
	{
		return _count > right._count;
	}
	bool operator <(const CharInfo right) const
	{
		return _count < right._count;
	}

};
template<class T>
class HaffManTree
{

	typedef HaffManTreeNode<T> Node;
public:
	HaffManTree(const T* a, size_t n, const T& invaid)
	{
		struct NodeCompare
		{
			bool operator()(Node* l, Node* r)
			{
				return l->_weight < r->_weight;
			}
		};

		Heap<Node*, NodeCompare> minHeap;

		for (size_t i = 0; i < n; i++)
		{
			if (a[i] != invaid)
			{
				minHeap.Push(new Node(a[i]));
			}
		}
		while (minHeap.Size()>1)
		{
			Node* left = minHeap.Top();
			minHeap.Pop();
			Node* right = minHeap.Top();
			minHeap.Pop();
			Node* parent = new Node(left->_weight + right->_weight);
			parent->_left = left;
			parent->_right = right;
			minHeap.Push(parent);
		}
		_root = minHeap.Top();
	}

	Node *GetRoot()
	{
		return _root;
	}

protected:
	Node* _root;
};


class FileCompress
{
public:
	FileCompress()
	{
		for (int i = 0; i < 256; i++)
		{
			_infos[i]._ch = i;
			_infos[i]._count = 0;
		}
	}
	void Compress(string filename)
	{
		FILE *fout = fopen(filename.c_str(), "rb");
		assert(fout);
		char ch = fgetc(fout);//读取文件字符
		while (ch != EOF)
		{
			_infos[(unsigned char)ch]._count++;
			ch = fgetc(fout);
		}
		CharInfo invalid;
		string code;
		HaffManTree<CharInfo> tree(_infos, 256, invalid);//建立哈夫曼树
		GenerateHaffManCode(tree.GetRoot(), code);//创建哈夫曼编码

		//压缩
		string compressName = filename + ".haffman";
		FILE* fin = fopen(compressName.c_str(), "wb");
		fseek(fout, 0, SEEK_SET);
		ch = fgetc(fout);
		char value = 0;
		int size = 0;
		while (ch != EOF)
		{
			string code = _infos[(unsigned char)ch]._code;
			for (int i = 0; i < code.size(); i++)
			{
				if (code[i] == '1')
				{
					value |= 1;
				}
				++size;
				if (size == 8)
				{
					fputc(value, fin);
					value = 0;
					size = 0;
				}
				value <<= 1;
			}

			ch = fgetc(fout);
		}
		if (size > 0)
		{
			value <<= 7 - size;
			fputc(value, fin);
		}
		//写匹配文件

		string configFile = filename += ".config";
		FILE* fconfig = fopen(configFile.c_str(),"wb");
		string line;
		for (int i = 0; i < 256; i++)
		{
			if (_infos[i]._count>0)
			{
				line += _infos[i]._ch;
				line += ",";
				char buf[1024];
				_itoa(_infos[i]._count, buf, 10);
				line += buf;
				line += '\n';
				fputs(line.c_str(), fconfig);
			}
			line.clear();
		}

		fclose(fout);
		fclose(fin);
		fclose(fconfig);
	}
	void GenerateHaffManCode(HaffManTreeNode<CharInfo>* root, string code)
	{
		if (root == NULL)
			return;
		if (root->_left == NULL && root->_right == NULL)
		{
			_infos[root->_weight._ch]._code = code;
			return;
		}
		GenerateHaffManCode(root->_left, code + '0');
		GenerateHaffManCode(root->_right, code + '1');
		
	}
	bool ReadLine(FILE* fout,string& line)
	{
		char ch = fgetc(fout);
		if (ch==EOF)
		{
			return false;
		}
		while (ch!=EOF && ch !='\n')
		{
			line += ch;
			ch = fgetc(fout);
		}
		return true;
	}
	void UnCompress(string filename)
	{
		//读配置文件
		string configname = filename + ".config";
		FILE* fconfig = fopen(configname.c_str(),"rb");
		assert(fconfig);
		string line;
		
		while(ReadLine(fconfig, line))
		{
			if (line.empty())
			{
				line += '\n';
			}
			else
			{
			    unsigned char ch = line[0];
				_infos[ch]._count = atoi(line.substr(2).c_str());
				line.clear();
			}
		}
		CharInfo invalid;
		HaffManTree<CharInfo> tree(_infos, 256, invalid);
		string compressFile = filename + ".haffman";

		FILE *fout = fopen(compressFile.c_str(), "rb");
		assert(fout);
		string uncompress = filename += "com";
		FILE *fin = fopen(uncompress.c_str(), "wb");
		unsigned char ch = fgetc(fout);
		int pos = 7;
		HaffManTreeNode<CharInfo>* root = tree.GetRoot();
		HaffManTreeNode<CharInfo>* cur = root;
		int count = root->_weight._count;
		while (ch != EOF)
		{
			if (ch & (1 << pos))
			{
				cur = cur->_right;
			}
			else
				cur = cur->_left;
			if (cur->_left == NULL && cur->_right == NULL)
			{
				fputc(cur->_weight._ch, fin);
				count--;
				cur = root;
			}
			
			if (pos == 0)
			{
				ch = fgetc(fout);
				pos = 8;
			}
			pos--;
			if (count == 0)
				break;
		}

		fclose(fout);
		fclose(fin);
		fclose(fconfig);
	}
protected:
	CharInfo _infos[256];
};

void TestCompressFile()
{
	FileCompress file;
	file.Compress("input");
}
void TestUnCompressFile()
{
	FileCompress file;
	file.UnCompress("input");
}

堆文件:

#pragma once
#include <vector>
#include <assert.h>
#include<iostream>
using namespace std; 

// 仿函数
template <class T>
struct Less
{
	bool operator() (const T& l, const T& r)
	{
		return l < r;
	}

};

template <class T>
struct Greater
{
	bool operator() (const T& l, const T& r)
	{
		return l > r;
	}
};


// 大堆
template<class T, class Compare = Greater<T>>
class Heap
{
public:
	Heap()
	{}

	Heap(const T* a, size_t size)
	{
		assert(a);
		for (size_t i = 0; i < size; ++i)
		{
			_a.push_back(a[i]);
		}

		// 建堆 N*lgN
		for (int i = (_a.size() - 2) / 2; i >0; --i)
		{
			_AdjustDown(i);
		}
	}

	//
	void Push(const T& x)
	{
		_a.push_back(x);
		_AdjustUp(_a.size() - 1);
	}

	// 
	void Pop()
	{
		assert(!_a.empty());

		swap(_a[0], _a[_a.size() - 1]);
		_a.pop_back();

		_AdjustDown(0);
	}
	T& Top()
	{
		return _a[0];
	}
	size_t Size()
	{
		return _a.size();
	}

	bool Empty()
	{
		return _a.empty();
	}

protected:
	void _AdjustDown(size_t parent)
	{
		// 指向左孩子
		size_t child = parent * 2 + 1;

		Compare com;
		while (child < _a.size())
		{
			// 选出左右孩子里面大的那一个

			//if (child+1 < _a.size() 
			//	&&_a[child+1] > _a[child])
			if (child + 1 < _a.size()
				&& com(_a[child + 1], _a[child]))
			{
				++child;
			}

			// 如果孩子比父节点要大,则交换并继续往下调整
			//if (_a[child] > _a[parent])
			if (com(_a[child], _a[parent]))
			{
				swap(_a[child], _a[parent]);
				parent = child;
				child = parent * 2 + 1;
			}
			else
			{
				break;
			}
		}
	}

	void _AdjustUp(int child)
	{
		int parent = (child - 1) / 2;
		Compare com;

		while (child > 0)
			//while(parent >= 0)
		{
			//if (_a[child] > _a[parent])
			if (com(_a[child], _a[parent]))
			{
				swap(_a[child], _a[parent]);
				child = parent;
				parent = (child - 1) / 2;
			}
			else
			{
				break;
			}
		}
	}

protected:
	//T* _a;
	//size_t _size;
	//size_t _capacity;
	vector<T> _a;

};

void TestHeap()
{
	int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 };

	Heap<int, Less<int>> hp1(a, sizeof(a) / sizeof(a[0]));

	hp1.Push(20);
}


template<class T>
class PriorityQueue
{
public:
	// O(lgN)
	void Push(const T& x)
	{
		_hp.Push(x);
	}

	// O(lgN)
	void Pop()
	{
		_hp.Pop();
	}

protected:
	Heap<T> _hp;
};

void _AdjustDown(int a[], int n, int parent)
{
	int child = parent * 2 + 1;
	while (child < n)
	{
		if (child + 1 < n && a[child + 1]>a[child])
		{
			++child;
		}

		if (a[parent] < a[child])
		{
			swap(a[parent], a[child]);
			parent = child;
			child = 2 * parent + 1;
		}
		else
		{
			break;
		}
	}
}


void HeapSort(int a[], size_t n)
{
	assert(a);

	// 建堆
	for (int i = (n - 2) / 2; i >= 0; --i)
	{
		_AdjustDown(a, n, i);
	}

	// 选出一个最大数据交换到末尾,剩下数据进行向下调整
	for (int i = 0; i < n; ++i)
	{
		swap(a[0], a[n - 1 - i]);

		_AdjustDown(a, n - i - 1, 0);
	}
}

void TestHeapSort()
{
	int a[] = { 2, 1, 4, 5, 0, 6, 3, 7, 8, 9 };
	HeapSort(a, sizeof(a) / sizeof(a[0]));
	for (int i = 0; i < 10; i++)
	{
		cout << a[i];
	}
}



测试代码:

#include"FileCompress.h"
#include<iostream>
#include<windows.h>
using namespace std;

int main()
{
	int begin = GetTickCount();
	TestCompressFile();
	TestUnCompressFile();
	int end = GetTickCount();

	cout << "begin:" << begin << endl;
	cout << "end:" << end << endl;

	cout << end-begin<< endl;
	cout << getchar() << endl;
	return 0;
}



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值