基于哈夫曼编码实现压缩与解压缩
输入文件为 Inputfile.txt 压缩后的文件为 HuffmanPress.dat
解压后的文件为 DecodeFile.txt
以下代码基于 VsCode 运行
可以实现包含英文小写,大写,空格,换行符的文本的压缩
minHeap.h
#ifndef MIN_HEAP_H
#define MIN_HEAP_H
//华南师范大学 关竣佑
#include<iostream>
using namespace std;
// utf8
template<class T>
class minHeap
{
private:
T* heap; //结点从 0 开始的堆
int size;
int Capacity;
void expand() //空间不够,扩展
{
T* newArray = new T[Capacity*2];
for(int i=0; i < Capacity; i++)
{
newArray[i] = heap[i];
}
T* toDel = heap;
heap = newArray;
Capacity =Capacity*2;
delete toDel;
}
bool cmp(T a,T b)
{ //是不是左边那个小
// if(a < b)
// return true;
// return false;
return a->weight < b->weight;
}
void AdjustDown(int currentIndex) // 向下调整
{
T value = heap[currentIndex];
int nextIndex = currentIndex;
while(1)
{
if(currentIndex * 2 +1 >= size) // 2i+1=left 2i+2 = right
break;
nextIndex = 2*currentIndex + 1;
if(nextIndex + 1 < size && cmp(heap[nextIndex+1],heap[nextIndex])) // 选最小的儿子
{
nextIndex = nextIndex + 1;
}
if(cmp(value,heap[nextIndex])) // value 比下面的还要小
break; // 不用处理了
//更小的上去
heap[currentIndex] = heap[nextIndex];
currentIndex = nextIndex;
}
heap[currentIndex] = value; //找到正确的位置放下去
}
void AdjustUp(int currentIndex)
{
T value = heap[currentIndex];
int nextIndex = currentIndex;
while(1)
{
if(currentIndex <= 0)
break; // 到树根了
nextIndex = (currentIndex-1) / 2; //它的双亲
// cout<<heap[currentIndex]<<" "<<heap[nextIndex]<<endl;
if(cmp(heap[nextIndex],value))
break;
heap[currentIndex] = heap[nextIndex];
currentIndex = nextIndex;
//this->Print();
}
heap[currentIndex] = value;
}
public:
minHeap()
{
heap = new T[10];
size = 0;
Capacity = 10;
}
minHeap(T arr[],int n) //把一个数组初始化为堆
{
heap = arr;
size = n;
Capacity = 2*size;
int i = (size - 3)/2; // 即 (最后一个元素下标 - 2) / 2 开始
while(i>=0)
{
AdjustDown(i);
i--;
}
}
int Size()
{
return size;
}
T Top() //返回堆顶元素
{
if(size == 0)
cout<<"没有元素"<<endl;
return heap[0];
}
void Push(T element) //插入
{
if(size == Capacity)
expand();
size++;
heap[size-1] = element;
AdjustUp(size-1);
}
void Pop() //删除堆顶
{
if(size == 0)
cout<<"操作非法"<<endl;
heap[0] = heap[size-1];
size--;
AdjustDown(0);
}
void Print()
{
for(int i=0;i<size;i++)
{
cout<<heap[i]<<" ";
}
cout<<endl;
}
bool isEmpty()
{
return size==0;
}
};
#endif
HuffmanTreeNode.h
#ifndef HUFFMAN_TREE_NODE_H
#define HUFFMAN_TREE_NODE_H
#include<iostream>
using namespace std;
// 哈夫曼树的结点
template<class T>
class HuffmanTreeNode
{
public:
int weight;
T data; //代表符号
HuffmanTreeNode<T>* left;
HuffmanTreeNode<T>* right;
HuffmanTreeNode<T>* parent;
HuffmanTreeNode(T data,int weight)
{
this->weight = weight;
this->data = data;
right = NULL;
left = NULL;
parent = NULL;
}
bool operator<(HuffmanTreeNode<T>* node1) const
{
cout<<"----"<<endl;
if(this->weight < node1->weight)
return true;
return false;
}
bool operator>(HuffmanTreeNode<T>* node1) const
{
cout<<"----"<<endl;
if(this->weight > node1->weight)
return true;
return false;
}
bool operator==(HuffmanTreeNode<T>* node1) const
{
cout<<"----"<<endl;
if(this->weight == node1->weight)
return true;
return false;
}
};
#endif
HuffmanTree.h
#ifndef HUFFMAN_TREE_H
#define HUFFMAN_TREE_H
#include"HuffmanTreeNode.h"
#include"minHeap.h"
#include<map>
#include<unordered_map>
#include<utility>
using namespace std;
template<class T>
class HuffmanTree
{
public:
HuffmanTreeNode<T>* root;
int size;
map<T,HuffmanTreeNode<T>*> LeafMap; //字符对应叶子的哈希表
HuffmanTree(T value[], int w[],int n) //建树
{
//LeafMap = new map<T,HuffmanTreeNode<T>*>();
minHeap<HuffmanTreeNode<T>*> heap;
for(int i=0;i<n;i++)
{
//cout<<w[i]<<endl;
HuffmanTreeNode<T>* node = new HuffmanTreeNode<T>(value[i],w[i]);
//HuffmanTreeNode<T> node(value[i],w[i]);
LeafMap.insert(pair<T,HuffmanTreeNode<T>*>(value[i], node));
heap.Push(node);
}
//cout<<"000"<<endl;
size = n;
//开始建树
for(int i=0;i<size-1;i++)
{
//cout<<i<<endl;
HuffmanTreeNode<T>* leftNode = heap.Top();
heap.Pop();
HuffmanTreeNode<T>* rightNode = heap.Top();
heap.Pop();
//cout<<leftNode->weight<<" "<<rightNode->weight<<endl;
HuffmanTreeNode<T>* newNode = new HuffmanTreeNode<T>(leftNode->data,leftNode->weight+rightNode->weight);
newNode->left = leftNode;
newNode->right = rightNode;
leftNode->parent = newNode;
rightNode->parent = newNode;
heap.Push(newNode);
// HuffmanTreeNode<T> leftNode = heap.Top();
// heap.Pop();
// HuffmanTreeNode<T> rightNode = heap.Top();
// heap.Pop();
// cout<<leftNode.weight<<" "<<rightNode.weight<<endl;
// HuffmanTreeNode<T> newNode(leftNode.data,leftNode.weight+rightNode.weight);
// newNode.left = &leftNode;
// newNode.right = &rightNode;
// leftNode.parent = &newNode;
// rightNode.parent = &newNode;
// heap.Push(newNode);
}
root = heap.Top();
}
map<T,HuffmanTreeNode<T>*> GetLeaf()
{
return LeafMap;
}
HuffmanTreeNode<T>* GetRoot()
{
return root;
}
};
#endif
HuffmanEncode.h
#ifndef HUFFMAN_TREE_ENCODE_H
#define HUFFMAN_TREE_ENCODE_H
#include"HuffmanTree.h"
#include<bitset>
#include<fstream>
#include<ostream>
#include<string>
#include<algorithm>
#include<stdio.h>
#include <sys/stat.h>
#include<cstring>
using namespace std;
template<class T>
void InOrder(HuffmanTreeNode<T>* node)
{
if(node->left)
InOrder(node->left);
cout<<node->weight<<" ";
if(node->right)
InOrder(node->right);
}
template<class T>
void PreOrder(HuffmanTreeNode<T>* node)
{
cout<<node->weight<<" ";
if(node->left)
PreOrder(node->left);
if(node->right)
PreOrder(node->right);
}
//获得文件大小的函数
int getFileSize1(string fileName) {
struct stat statbuf;
char file[10];
strcpy(file,fileName.c_str());
stat(file, &statbuf);
int filesize = statbuf.st_size;
return filesize;
}
class HuffmanTreeEncode
{
public:
int size;
HuffmanTree<char>* tree;
char* value;
int* w;
int index;
int BeforePressSize;
string code;
map<char,HuffmanTreeNode<char>*> LeafMap;
HuffmanTreeEncode()
{
size = 0;
tree = NULL;
char c = 0;
value = new char[128];
for(int i=0;i<128;i++)
value[i] = c+i;
w = new int[128];
for(int i=0;i<128;i++)
w[i] = 0;
code = "";
}
void encode(string path) //规则 往左子树为1 右子树为0
{
//string text = "";
ifstream ifs;
ifs.open(path);
// string tmp;
// while(ifs>>tmp)
// {
// text+=tmp;
// }
BeforePressSize = getFileSize1(path);
istreambuf_iterator<char> begin(ifs);
istreambuf_iterator<char> end;
string text(begin, end);
ifs.close();
for(int i=0;i<text.size();i++)
{
w[text[i]-char(0)]++;
}
// for(int i=0;i<26;i++)
// cout<<w[i]<<" ";
tree = new HuffmanTree<char>(value,w,128);
// InOrder(tree->root);
// cout<<endl;
// PreOrder(tree->root);
LeafMap = tree->GetLeaf();
for(int i=0;i<text.size();i++)
{
//cout<<i<<endl;
code+=FindToRoot(text[i]);
}
int n = code.size();
// 编码输出到文�?
//cout<<code<<endl;
//reverse(code.begin(),code.end());
bitset<1000000> bits(code); //开大点,能最多放�?100�?
//cout<<bits<<endl;
ofstream ofs;
ofs.open("HuffmanPress.dat");
cout<<sizeof(bits)<<endl;
// for(int i=0;i<10;i++)
// cout<<bits[i]<<endl;
int wsize = code.size()/8+128; //保证输出的字节数够用但是尽量少,因为一开始bitset数组里面的多余位�?0
//int wsize = sizeof(bits);
//输出权重信息等到文件以便解码时建树
ofs<<code.size()<<' ';
for(int i=0;i<127;i++)
ofs<<w[i]<<' ';
ofs<<w[127];
//cout<<bits<<endl;
ofs.write((char*)&bits,wsize);
ofs.close();
int AfterPressSize = getFileSize1("HuffmanPress.dat");
float ratio = 0;//压缩率
ratio = ( (float) AfterPressSize) / ((float)BeforePressSize) ;
//cout << "压缩前大小为:" << BeforePressSize << "字节 压缩后大小为:" << AfterPressSize << "字节" << endl;
printf("压缩前大小为:%d 字节 压缩后大小为:%d 字节 ", BeforePressSize,AfterPressSize);
printf("---------- 压缩成功!---------\n");
printf("---------压缩率为: %.2f %%--------",ratio*100);
cout<<endl;
}
string FindToRoot(char key)
{
string ans = "";
HuffmanTreeNode<char>* node = LeafMap[key];
while(node->parent)
{
HuffmanTreeNode<char>* parent = node->parent;
if(parent->left == node)
ans+='1';
else
ans+='0';
node = node->parent;
}
reverse(ans.begin(),ans.end());
return ans;
}
};
#endif
HuffmanDecode.h
#ifndef HUFFMAN_TREE_DECODE_H
#define HUFFMAN_TREE_DECODE_H
#include"HuffmanTree.h"
#include<bitset>
#include<fstream>
#include<ostream>
#include<string>
#include<algorithm>
#include<stdio.h>
#include <sys/stat.h>
#include<cstring>
using namespace std;
// template<class T>
// void InOrder(HuffmanTreeNode<T>* node)
// {
// if(node->left)
// InOrder(node->left);
// cout<<node->weight<<" ";
// if(node->right)
// InOrder(node->right);
// }
// template<class T>
// void PreOrder(HuffmanTreeNode<T>* node)
// {
// cout<<node->weight<<" ";
// if(node->left)
// PreOrder(node->left);
// if(node->right)
// PreOrder(node->right);
// }
class HuffmanTreeDecode
{
public:
int size; //应该读取的字节数
HuffmanTree<char>* tree;
char* value;
int* w;
int index;
string code;
HuffmanTreeDecode()
{
size = 0;
tree = NULL;
value = new char[128];
w = new int[128];
char c = 0;
for(int i=0;i<128;i++)
value[i] = c+i;
}
void decode(string path)
{
int codesize = 0;
ifstream ifs;
ifs.open("HuffmanPress.dat");
bitset<1000000> ans;
ifs>>codesize;
for(int i=0;i<128;i++)
ifs>>w[i];
int rsize = codesize/8 + 1;
ifs.read((char*)&ans,sizeof(ans));
ifs.close();
//cout<<ans<<endl;
int index = codesize-1;
//重新建树
tree = new HuffmanTree<char>(value,w,128);
string s = "";
while(index>=0)
{
char c = FindLeafChar(tree->GetRoot(),index,ans);
s+=c;
}
//cout<<s<<endl;
cout<<"-------解压成功!文件输出到 DecodeFile.txt--------"<<endl;
ofstream ofs;
ofs.open("DecodeFile.txt");
ofs<<s;
ofs.close();
}
char FindLeafChar(HuffmanTreeNode<char>* root,int& index,bitset<1000000> code)
{
//找到叶子就可以知道原文
while(root)
{
if(root->left == NULL && root->right == NULL)
{
return root->data;
}
if(code[index] == 1)
{
root = root->left;
}
else{
root = root->right;
}
index--;
}
return ' ';
}
};
#endif
HuffmanEncode.cpp
#include"HuffmanTree.h"
#include"HuffmanTreeEncode.h"
#include"HuffmanTreeDecode.h"
using namespace std;
// 华南师范大学 关竣佑
//哈夫曼编码压缩
int main()
{
HuffmanTreeEncode Encode;
Encode.encode("Inputfile.txt");
//Encode.decode();
return 0;
}
HuffmanDecode.cpp
#include"HuffmanTree.h"
#include"HuffmanTreeEncode.h"
#include"HuffmanTreeDecode.h"
using namespace std;
// 华南师范大学 关竣佑
//哈夫曼编码解压器
int main()
{
HuffmanTreeDecode Decode;
Decode.decode("HuffmanPree.dat");
//Encode.decode();
return 0;
}
目前的代码仍可能偶然出现解码乱码,字符不匹配的情况,正在改进中。