文件压缩:
简介:实现对文本文件的压缩和解压
开发环境:Windows,Vs2013
主要技术:文件读写,堆,哈夫曼树,仿函数,位操作
项目描述:文件压缩过程:打开文本并逐个字节统计字符出现的次数,然后建立Huffman树,根据Huffman树可得到每个字符对应的Huffman编码,然后进行字符写入操作,最后写入配置文件(统计每个字符,字符出现的次数,对应的Huffman编码,这样方便解压)文件解压过程:根据配置文件重建Huffman树,通过Huffman树得到Huffman编码,然后将对应的字符写入文件,直到读完配置文件所有内容
HaffmanTree.h头文件
#pragma once
#include<iostream>
#include"Heap.h"
#include"FileComparess.h"
using namespace std;
template<class T>
struct HaffmanNode //HaffmanNode节点
{
HaffmanNode<T>* _left;
HaffmanNode<T>* _right;
T _wight;
HaffmanNode(const T& wight)
:_left(NULL)
, _right(NULL)
, _wight(wight)
{}
};
template<class T>
class HaffmanTree
{
public:
typedef HaffmanNode<T> Node;
HaffmanTree(const T* a, size_t size, const T& invalid) //构造函数
{
_root = _CreatHaffmanTree(a, size, invalid);
}
Node* GetRoot()
{
return _root;
}
protected:
Node* _CreatHaffmanTree(const T* a,size_t size, const T& invalid) //创建huffmanTree
{
Heap<Node*, Less<Node*>> minHeap; //构建小堆
for (size_t i = 0; i < size; ++i)
{
if (a[i] != invalid)
{
Node* tmp = new Node(a[i]);
minHeap.Push(tmp);
}
}
while (!minHeap.Empty())
{
Node* left = minHeap.GetTop();
minHeap.Pop();
Node* right = NULL;
if (!minHeap.Empty())
{
right = minHeap.GetTop();
minHeap.Pop();
}
Node* parent = NULL;
if (right)
{
parent = new Node(left->_wight + right->_wight);
}
else
{
parent = new Node(left->_wight);
}
parent->_left = left;
parent->_right = right;
if (minHeap.Empty())
{
return parent;
}
minHeap.Push(parent);
}
return NULL;
}
protected:
Node* _root;
};
Heap.h //建堆的过程
#pragma once
#define _CRT_SECURE_NO_WARNINGS 1
#include<iostream>
#include<vector>
#include<assert.h>
//#include"HaffmanTree.h"
using namespace std;
template<class T>
struct Less
{
bool operator()(const T& l, const T& r)
{
return l < r;
}
};
template<class T>
struct Greater
{
bool operator()(const T& l, const T& r)
{
return l > r;
}
};
template<class T>
struct Less<T*>
{
bool operator()(const T*Nodel, const T*Noder)
{
return Nodel->_wight < Noder->_wight;
}
};
template<class T,class Continer = Less<T>>//默认为小堆
class Heap
{
public:
Heap(){};
Heap(const T* a, size_t size,const T& invalid);
Heap(vector<T> a);
Heap(const vector<T>& v);
void Push(const T& x);
void Pop();
T& GetTop();
bool Empty();
size_t Size();
void HeapSort(T* a, size_t size);
protected:
void _AdjustDown(size_t parent);
void _AdjustUp(int child);
protected:
vector<T> _a;
};
template<class T, class Continer = Less<T>>
Heap<T, Continer>::Heap(const T* a, size_t size,const T& invalid)
{
_a.reserve(size);
for (size_t i = 0; i < size; ++i)
{
if (a[i] != invalid)
{
_a.push_back(a[i]);
}
}
//建堆
for (int i = (_a.size() - 2) / 2; i >= 0; i--)
//从第一个非叶子结点开始下调,叶子结点可以看作是一个大堆或小堆
{
_AdjustDown(i);
}
}
template<class T, class Continer = Less<T>>
Heap<T, Continer>::Heap(vector<T> a)
{
_a.swap(a);
// 建堆
for (int i = (_a.size() - 2) / 2; i >= 0; --i)
{
_AdjustDown(i);
}
}
template<class T, class Continer = Less<T>>
Heap<T, Continer>::Heap(const vector<T>& v)
:_a(v)
{
//_a.resize(v.size());
}
template<class T, class Continer = Less<T>>
void Heap<T, Continer>::Push(const T& x)
{
_a.push_back(x);
_AdjustUp(_a.size() - 1);
}
template<class T, class Continer = Less<T>>
void Heap<T, Continer>::Pop()
{
assert(!_a.empty());
size_t size = _a.size();
swap(_a[0], _a[size - 1]);
_a.pop_back();
_AdjustDown(0);
}
template<class T, class Continer = Less<T>>
T& Heap<T, Continer>::GetTop()
{
return _a[0];
}
template<class T, class Continer = Less<T>>
bool Heap<T, Continer>::Empty()
{
return _a.empty();
}
template<class T, class Continer = Less<T>>
size_t Heap<T, Continer>::Size()
{
return _a.size();
}
template<class T, class Continer = Less<T>>
void Heap<T, Continer>::_AdjustDown(size_t parent)
{
Continer _con;
size_t child = parent * 2 + 1;
size_t size = _a.size();
while (child < size)
{
if (child + 1 < size&&_con(_a[child + 1], _a[child]))
//注意这必须是child+1更大或更小,所以把child+1放在前面
++child;
if (/*_a[parent] < _a[child]*/_con(_a[child], _a[parent]))
{
swap(_a[parent], _a[child]);
parent = child;
child = parent * 2 + 1;
}
else
break;
}
}
template<class T, class Continer = Less<T>>
void Heap<T, Continer>::_AdjustUp(int child)
{
Continer _con;
int parent = (child - 1) / 2;
while (child > 0)
{
if (_con(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
child = parent;
parent = (child - 1) / 2;
}
else
break;
}
}<span style="color:#cc0000;">
</span>
#pragma once
#define _CRT_SECURE_NO_WARNINGS 1
#include<iostream>
#include"HaffmanTree.h"
using namespace std;
typedef long LongType;
struct CharInfo //每个字符的数据结构
{
unsigned char _ch; //存储该字符
LongType _count; //字符出现次数
string _code; //该字符对应的huffman编码
CharInfo(const LongType count = 0 ) //构造函数
:_count(count)
{}
CharInfo(const char ch)
:_ch(ch)
{}
bool operator!=(const CharInfo& c)const
{
return _count != c._count;
}
CharInfo operator+(const CharInfo& c)const
{
return CharInfo(_count + c._count);
}
bool operator<(const CharInfo& c)const
{
return _count < c._count;
}
};
class FileComparess
{
public:
//文件压缩
void Comparess(const char* filename)
{
FILE* fread = fopen(filename, "rb");
if (fread == NULL)
{
cout << "打开待压缩文件失败" << endl;
return;
}
for (int i = 0; i < 256; i++)
{
_info[i]._ch = i;
}
unsigned char ch = fgetc(fread); //不能使用char,压缩汉字时的字符出现范围是0~255
while (!feof(fread)) //判断是否到文件结尾
{
//在windows下回车是'\r\n'的组合,遇到‘\r\n’时屏幕上打印换行
if (ch == '\r')
{
ch = fgetc(fread); //跳过‘\r’
if (ch != '\n')
{
fseek(fread, -1, SEEK_CUR);
}
}
_info[ch]._count++;
ch = fgetc(fread);
}
HaffmanTree<CharInfo> h(_info, 256, CharInfo());
HaffmanNode<CharInfo>* root = h.GetRoot();
string str;
GenerateHaffmanCode(root, str);
//重新打开待压缩文件读
fseek(fread, 0, SEEK_SET);
ch = fgetc(fread);
unsigned char data = 0; //要写入压缩文件的数据
int bitcount = 7; //标记移位信息
//打开文件写压缩后的编码
string write(filename); //???
write = write + ".comparess"; //???
FILE* fwrite = fopen(write.c_str(), "wb"); //???
while (!feof(fread))
{
if (ch == '\r')
{
ch = fgetc(fread);
if (ch != '\n')
{
fseek(fread, -1, SEEK_CUR);
}
}
const char* cur = _info[ch]._code.c_str();
while (*cur)
{
if (bitcount >= 0)
{
data = data | ((*cur - '0') << bitcount);
bitcount--;
}
if (bitcount < 0)
{
fputc(data, fwrite);
bitcount = 7;
data = 0;
}
cur++;
}
ch = fgetc(fread);
}
fputc(data, fwrite);//最后一个字节没写满8位也要把data写入文件(困扰好久)
//写配置文件
WriteConfig(filename);
fclose(fread);
fclose(fwrite);
}
//文件解压缩
void UnComparess(const char* filename)
{
CharInfo HNarry[256];
//读配置文件
ReadConfig(filename, HNarry);
//重建Haffman树
HaffmanTree<CharInfo> h(HNarry, 256, CharInfo());
//遍历树,找叶子结点,写输出文件
HaffmanNode<CharInfo>* root = h.GetRoot();
HaffmanNode<CharInfo>* cur = root;
//打开压缩文件读
string comf(filename);
comf = comf + ".comparess";
FILE* fread = fopen(comf.c_str(), "rb");
unsigned char ch = fgetc(fread);
FILE* fwrite = fopen("output", "wb");
int readcount = root->_wight._count;//根节点的_count值就是整棵树字符出现的次数
while (readcount)
{
int tmp = 1;
int bit = 7; //左移的位数
while (bit >= 0)
{
if (ch & (tmp << bit)) //从最高位开始判断Huffman编码,如果是1则访问右孩子
{
cur = cur->_right;
bit--;
}
else //否则访问左孩子
{
cur = cur->_left;
bit--;
}
//找到叶子结点
if (cur->_left == NULL&&cur->_right == NULL)
{
fputc(cur->_wight._ch, fwrite);
cur = root;
readcount--;
//最后一个字符的编码在最后两个字节当中的情况
if (!readcount)
{
break;
}
}
}
ch = fgetc(fread);
}
fclose(fread);
fclose(fwrite);
}
protected:
//得到Haffman编码(后序遍历HaffmanTree)
void GenerateHaffmanCode(HaffmanNode<CharInfo>* root, string& code)
{
if (root == NULL)
return;
GenerateHaffmanCode(root->_left, code + '0');
GenerateHaffmanCode(root->_right, code + '1');
root->_wight._code = code;
if (root->_left == NULL&&root->_right == NULL)
{
_info[root->_wight._ch]._code = code;
}
}
void WriteConfig(const char* filename)
{
string conf(filename);
conf = conf + "config";
FILE* fcon = fopen(conf.c_str(), "wb");
for (int i = 0; i < 256; ++i)
{
if (_info[i]._count)
{
fputc(_info[i]._ch, fcon);
fputc(',', fcon);
char count[100];
_itoa(_info[i]._count, count, 10);
fputs(count, fcon);
fputc(',', fcon);
fputs(_info[i]._code.c_str(), fcon);
fputc(',', fcon);
fputc('\n', fcon);
}
}
fclose(fcon);
}
void ReadConfig(const char* filename, CharInfo* HNarry)
{
string conf(filename);
conf = conf + "config";
FILE* fread = fopen(conf.c_str(), "rb");
if (fread == NULL)
{
cout << "打开待压缩文件失败" << endl;
return;
}
char str[100];
while (fgets(str, 100, fread)) //得到配置文件的一行 ???
{
char* ptr = str;
unsigned char index = (unsigned char)*ptr;
if (index == '\n')
{
HNarry[index]._ch = index;
fgets(str, 100, fread);
char* ptr = str;
ptr++;
LongType count = 0;//字符出现的次数
while (*ptr != ',' && *ptr)//字符转换成数据
{
count *= 10;
count += (*ptr - '0');
ptr++;
}
HNarry[index]._count = count;
ptr++;
string code(ptr);
HNarry[index]._code = code;
}
else
{
HNarry[index]._ch = index;
ptr += 2;
LongType count = 0;
while (*ptr != ',' && *ptr)
{
count *= 10;
count += (*ptr - '0');
ptr++;
}
HNarry[index]._count = count;
ptr++;
string code(ptr);
HNarry[index]._code = code;
}
}
}
protected:
CharInfo _info[256];
};