文件压缩

最新推荐文章于 2021-08-23 23:45:07 发布

csdnldsg

最新推荐文章于 2021-08-23 23:45:07 发布

阅读量317

点赞数

本文链接：https://blog.csdn.net/csdnldsg/article/details/52626233

版权

文件压缩：

简介：实现对文本文件的压缩和解压

开发环境：Windows,Vs2013

主要技术：文件读写，堆，哈夫曼树，仿函数，位操作

项目描述：文件压缩过程：打开文本并逐个字节统计字符出现的次数，然后建立Huffman树，根据Huffman树可得到每个字符对应的Huffman编码，然后进行字符写入操作，最后写入配置文件（统计每个字符，字符出现的次数，对应的Huffman编码，这样方便解压）文件解压过程：根据配置文件重建Huffman树，通过Huffman树得到Huffman编码，然后将对应的字符写入文件，直到读完配置文件所有内容

HaffmanTree.h头文件

#pragma once
#include<iostream>
#include"Heap.h"
#include"FileComparess.h"

using namespace std;


template<class T>
struct HaffmanNode         //HaffmanNode节点
{
	HaffmanNode<T>* _left;
	HaffmanNode<T>* _right;
	T _wight;

	HaffmanNode(const T& wight)
		:_left(NULL)
		, _right(NULL)
		, _wight(wight)
	{}
};


template<class T>
class HaffmanTree
{
public:
	typedef HaffmanNode<T> Node;
	HaffmanTree(const T* a, size_t size, const T& invalid)    //构造函数
	{
		_root = _CreatHaffmanTree(a, size, invalid);
	}

	Node* GetRoot()
	{
	    return _root;
	}

protected:
	Node* _CreatHaffmanTree(const T* a,size_t size, const T& invalid)  //创建huffmanTree
	{
		Heap<Node*, Less<Node*>> minHeap;   //构建小堆
		for (size_t i = 0; i < size; ++i)
		{
			if (a[i] != invalid)
			{
				Node* tmp = new Node(a[i]);
				minHeap.Push(tmp);
			}
		}
		while (!minHeap.Empty())
		{
			Node* left = minHeap.GetTop();
			minHeap.Pop();
			Node* right = NULL;
			if (!minHeap.Empty())
			{
				right = minHeap.GetTop();
				minHeap.Pop();
			}
			Node* parent = NULL;
			if (right)
			{
				parent = new Node(left->_wight + right->_wight);
			}
			else
			{
				parent = new Node(left->_wight);
			}
			parent->_left = left;
			parent->_right = right;
			if (minHeap.Empty())
			{
				return parent;
			}
			minHeap.Push(parent);
		}
		return NULL;
	}
protected:
	Node* _root;
};

Heap.h //建堆的过程

#pragma once
#define _CRT_SECURE_NO_WARNINGS 1
#include<iostream>
#include<vector>
#include<assert.h>
//#include"HaffmanTree.h"
using namespace std;

template<class T>
struct Less
{
	bool operator()(const T& l, const T& r)
	{
		return l < r;
	}
};


template<class T>
struct Greater
{
	bool operator()(const T& l, const T& r)
	{
		return l > r;
	}
};

template<class T>
struct Less<T*>
{
	bool operator()(const T*Nodel, const T*Noder)
	{
		return Nodel->_wight < Noder->_wight;
	}
};

template<class T,class Continer = Less<T>>//默认为小堆
class Heap
{
public:
	Heap(){};
	Heap(const T* a, size_t size,const T& invalid);
	Heap(vector<T> a);
	Heap(const vector<T>& v);
	void Push(const T& x);
	void Pop();
	T& GetTop();
	bool Empty();
	size_t Size();
	void HeapSort(T* a, size_t size);
protected:
	void _AdjustDown(size_t parent);
	void _AdjustUp(int child);
protected:
	vector<T> _a;
};

template<class T, class Continer = Less<T>>
Heap<T, Continer>::Heap(const T* a, size_t size,const T& invalid)
{
	_a.reserve(size);

	for (size_t i = 0; i < size; ++i)
	{
		if (a[i] != invalid)
		{
			_a.push_back(a[i]);
		}
	}

	//建堆
	for (int i = (_a.size() - 2) / 2; i >= 0; i--)
		//从第一个非叶子结点开始下调，叶子结点可以看作是一个大堆或小堆
	{

		_AdjustDown(i);
	}
}
template<class T, class Continer = Less<T>>
Heap<T, Continer>::Heap(vector<T> a)
{
	_a.swap(a);

	// 建堆
	for (int i = (_a.size() - 2) / 2; i >= 0; --i)
	{
		_AdjustDown(i);
	}
}
template<class T, class Continer = Less<T>>
Heap<T, Continer>::Heap(const vector<T>& v)
	:_a(v)
{
	//_a.resize(v.size());
}
template<class T, class Continer = Less<T>>
void Heap<T, Continer>::Push(const T& x)
{
	_a.push_back(x);
	_AdjustUp(_a.size() - 1);
}
template<class T, class Continer = Less<T>>
void Heap<T, Continer>::Pop()
{
	assert(!_a.empty());
	size_t size = _a.size();
	swap(_a[0], _a[size - 1]);
	_a.pop_back();
	_AdjustDown(0);
}
template<class T, class Continer = Less<T>>
T& Heap<T, Continer>::GetTop()
{
	return _a[0];
}
template<class T, class Continer = Less<T>>
bool Heap<T, Continer>::Empty()
{
	return _a.empty();
}
template<class T, class Continer = Less<T>>
size_t Heap<T, Continer>::Size()
{
	return _a.size();
}

template<class T, class Continer = Less<T>>
void Heap<T, Continer>::_AdjustDown(size_t parent)
{
	Continer _con;
	size_t child = parent * 2 + 1;
	size_t size = _a.size();
	while (child < size)
	{
		if (child + 1 < size&&_con(_a[child + 1], _a[child]))
			//注意这必须是child+1更大或更小，所以把child+1放在前面
			++child;
		if (/*_a[parent] < _a[child]*/_con(_a[child], _a[parent]))
		{
			swap(_a[parent], _a[child]);
			parent = child;
			child = parent * 2 + 1;
		}
		else
			break;
	}
}


template<class T, class Continer = Less<T>>
void Heap<T, Continer>::_AdjustUp(int child)
{
	Continer _con;
	int parent = (child - 1) / 2;
	while (child > 0)
	{
		if (_con(_a[child], _a[parent]))
		{
			swap(_a[child], _a[parent]);
			child = parent;
			parent = (child - 1) / 2;
		}
		else
			break;
	}
}<span style="color:#cc0000;">
</span>

FileComparess.h //文件压缩的过程

#pragma once
#define _CRT_SECURE_NO_WARNINGS 1
#include<iostream>
#include"HaffmanTree.h"
using namespace std;
typedef long LongType;

struct CharInfo        //每个字符的数据结构
{
	unsigned char _ch;   //存储该字符
	LongType _count;     //字符出现次数
	string _code;        //该字符对应的huffman编码

	CharInfo(const LongType count = 0 )  //构造函数
		:_count(count)
	{}

	CharInfo(const char ch)
		:_ch(ch)
	{}

	bool operator!=(const CharInfo& c)const
	{
		return _count != c._count;
	}

	CharInfo operator+(const CharInfo& c)const
	{
		return CharInfo(_count + c._count);
	}

	bool operator<(const CharInfo& c)const
	{
		return _count < c._count;
	}
};

class FileComparess
{
public:
	//文件压缩
	void Comparess(const char* filename)
	{
		FILE* fread = fopen(filename, "rb");
		if (fread == NULL)
		{
			cout << "打开待压缩文件失败" << endl;
			return;
		}
		for (int i = 0; i < 256; i++)
		{
			_info[i]._ch = i;
		}
		unsigned char ch = fgetc(fread); //不能使用char，压缩汉字时的字符出现范围是0~255
		while (!feof(fread)) //判断是否到文件结尾
		{
			//在windows下回车是'\r\n'的组合，遇到‘\r\n’时屏幕上打印换行
			if (ch == '\r')
			{
				ch = fgetc(fread); //跳过‘\r’
				if (ch != '\n')
				{
					fseek(fread, -1, SEEK_CUR);
				}
			}
			_info[ch]._count++;
			ch = fgetc(fread);
		}
		HaffmanTree<CharInfo> h(_info, 256, CharInfo());
		HaffmanNode<CharInfo>* root = h.GetRoot();
		string str;
		GenerateHaffmanCode(root, str);
		//重新打开待压缩文件读
		fseek(fread, 0, SEEK_SET);
		ch = fgetc(fread);
		unsigned char data = 0;   //要写入压缩文件的数据
		int bitcount = 7;  //标记移位信息
		//打开文件写压缩后的编码
		string write(filename);      //???
		write = write + ".comparess";  //???
		FILE* fwrite = fopen(write.c_str(), "wb");   //???
		while (!feof(fread))
		{
			if (ch == '\r')
			{
				ch = fgetc(fread);
				if (ch != '\n')
				{
					fseek(fread, -1, SEEK_CUR);
				}
			}
			const char* cur = _info[ch]._code.c_str();
			while (*cur)
			{
				if (bitcount >= 0)
				{
					data = data | ((*cur - '0') << bitcount);
					bitcount--;
				}
				if (bitcount < 0)
				{
					fputc(data, fwrite);
					bitcount = 7;
					data = 0;
				}
				cur++;
			}
			ch = fgetc(fread);
		}
		fputc(data, fwrite);//最后一个字节没写满8位也要把data写入文件（困扰好久）
		//写配置文件
		WriteConfig(filename);
		fclose(fread);
		fclose(fwrite);
	}


	//文件解压缩
	void UnComparess(const char* filename)
	{
		CharInfo HNarry[256];
		//读配置文件
		ReadConfig(filename, HNarry);
		//重建Haffman树
		HaffmanTree<CharInfo> h(HNarry, 256, CharInfo());
		//遍历树，找叶子结点，写输出文件
		HaffmanNode<CharInfo>* root = h.GetRoot();
		HaffmanNode<CharInfo>* cur = root;
		//打开压缩文件读
		string comf(filename);
		comf = comf + ".comparess";
		FILE* fread = fopen(comf.c_str(), "rb");
		unsigned char ch = fgetc(fread);
		FILE* fwrite = fopen("output", "wb");
		int readcount = root->_wight._count;//根节点的_count值就是整棵树字符出现的次数
		while (readcount)
		{
			int tmp = 1;
			int bit = 7;   //左移的位数
			while (bit >= 0)
			{
				if (ch & (tmp << bit))  //从最高位开始判断Huffman编码，如果是1则访问右孩子
				{
					cur = cur->_right;
					bit--;
				}
				else         //否则访问左孩子
				{
					cur = cur->_left;
					bit--;
				}
				//找到叶子结点
				if (cur->_left == NULL&&cur->_right == NULL)
				{
					fputc(cur->_wight._ch, fwrite);
					cur = root;
					readcount--;
					//最后一个字符的编码在最后两个字节当中的情况
					if (!readcount)
					{
						break;
					}
				}
				
			}
			ch = fgetc(fread);
		}
		fclose(fread);
		fclose(fwrite);
	}
protected:
	//得到Haffman编码（后序遍历HaffmanTree）
	void GenerateHaffmanCode(HaffmanNode<CharInfo>* root, string& code)
	{
		if (root == NULL)
			return;
		GenerateHaffmanCode(root->_left, code + '0');
		GenerateHaffmanCode(root->_right, code + '1');
		root->_wight._code = code;
		if (root->_left == NULL&&root->_right == NULL)
		{
			_info[root->_wight._ch]._code = code;
		}

	}
	void WriteConfig(const char* filename)
	{
		string conf(filename);
		conf = conf + "config";
		FILE* fcon = fopen(conf.c_str(), "wb");
		for (int i = 0; i < 256; ++i)
		{

			if (_info[i]._count)
			{
				fputc(_info[i]._ch, fcon);
				fputc(',', fcon);
				char count[100];
				_itoa(_info[i]._count, count, 10);
				fputs(count, fcon);
				fputc(',', fcon);
				fputs(_info[i]._code.c_str(), fcon);
				fputc(',', fcon);
				fputc('\n', fcon);
			}
		}
		fclose(fcon);
	}
	void ReadConfig(const char* filename, CharInfo* HNarry)
	{
		string conf(filename);
		conf = conf + "config";
		FILE* fread = fopen(conf.c_str(), "rb");
		if (fread == NULL)
		{
			cout << "打开待压缩文件失败" << endl;
			return;
		}
		char str[100];
		while (fgets(str, 100, fread)) //得到配置文件的一行  ???
		{
			char* ptr = str;
			unsigned char index = (unsigned char)*ptr;
			if (index == '\n')
			{
				HNarry[index]._ch = index;
				fgets(str, 100, fread);
				char* ptr = str;
				
				
				ptr++;
				LongType count = 0;//字符出现的次数
				while (*ptr != ',' && *ptr)//字符转换成数据
				{
					count *= 10;
					count += (*ptr - '0');
					ptr++;
				}
				HNarry[index]._count = count;
				ptr++;
				string code(ptr);
				HNarry[index]._code = code;
			}
			else
			{
				HNarry[index]._ch = index;
				ptr += 2;
				LongType count = 0;
				while (*ptr != ',' && *ptr)
				{
					count *= 10;
					count += (*ptr - '0');
					ptr++;
				}
				HNarry[index]._count = count;
				ptr++;
				string code(ptr);
				HNarry[index]._code = code;
			}
		}
	}
protected:
	CharInfo _info[256];
};

csdnldsg

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
文件压缩

文件压缩：简介：实现对文本文件的压缩和解压开发环境：Windows,Vs2013 主要技术：文件读写，堆，哈夫曼树，仿函数，位操作项目描述：文件压缩过程：打开文本并逐个字节统计字符出现的次数，然后建立Huffman树，根据Huffman树可得到每个字符对应的Huff
复制链接

扫一扫