Huffman的原理大家都应该很清楚,今天尝试用它来压缩txt文件和解压,txt中只含有英文,中文的编码我不清楚,所以遇到后就会吃掉
#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include<string>
#include<fstream>
#include<vector>
#include<algorithm>
#include<queue>
#include<time.h>
#include<map>
using namespace std;
/*********************************** Hufman **********************************************************/
const int BIGSIZE = 260;
long long cpcount[BIGSIZE] = { 0 };
struct Node//hufman 的节点
{
char charcter;
long long power;
Node * left = NULL;
Node * right = NULL;
Node * parent = NULL;
char code[20];
bool is_left;
};
bool cmp(const Node* it1, const Node* it2)
{
if (it1->power > it2->power)
return true;
else
return false;
}
class Hufman
{
private:
Node * root;
long compress_time;
long decompress_time;
double B_bit = 0;
double C_bit = 0;
string compress_filename;
public:
Hufman();
void Build_hufmanTree();//建树
void code_hufmanTree();//确定各个字符的编码
void Hufman_code(char ch, char code[20]);
void compress();//压缩
void decompress();//解压
void Hufman_action();//接口
};
int flag;
class Cache
//缓存类 将0 1每七个形成一个一个ancll码对应的字符,然后把这个字符写到文件中,解压的时候将文件中的每个字符再变成7个01码
{
public:
Cache();
void add(string s);
char getCH();
bool have_tail();
bool cache_is_ok = false;
private:
int binary[100];
int position = 0;
};
Cache::Cache()
{
for (int i = 0; i < 100; i++)
binary[i] = 0;
}
void Cache::add(string s)
{
for (int i = 0; i < s.size(); i++)
{
binary[position] = s[i] - 48;
position++;
}
if (position>7)
cache_is_ok = true;
}
char Cache::getCH()
{
int num = 0;
for (int i = 0; i < 7; i++)
{
int two = binary[i];
for (int j = 5 - i; j >= 0; j--)
two = 2 * two;
num += two;
}
for (int i = 7; i < position; i++)
binary[i - 7] = binary[i];
position -= 7;
if (position < 7)
cache_is_ok = false;
char out;
out = char((num - 48) + '0');
return out;
}
bool Cache::have_tail()
{
if (position != 0)
{
cache_is_ok = true;
for (int i = position; i < 7; i++)
binary[i] = 0;
flag = position;
return true;
}
return false;
}
Hufman::Hufman()
{
cout << "Hufman压缩" << endl;
cout << "请输入要压缩的文件名:";
getline(cin, compress_filename);
fstream fin;
fin.open(compress_filename);
if (!fin.is_open())
{
cerr << "cannot open " << compress_filename << endl;
return;
}
char c;
while (fin.get(c)) //计算各个字符出现的频率
{
cpcount[(int)c]++;
}
fin.close();
}
void Hufman::Build_hufmanTree()
{
vector<Node *>hufman_heap;
for (int i = 0; i < BIGSIZE; i++)
{
if (cpcount[i] != 0)
{
Node * p = new Node;
p->power = cpcount[i];
p->charcter = (char)i;
p->code[0] = '\0';
hufman_heap.push_back(p);
}
}
vector<Node *>::iterator it;
make_heap(hufman_heap.begin(), hufman_heap.end(), cmp);//建立最小堆
while (hufman_heap.size() != 1)
{
Node * p = new Node;
p->code[0] = '\0';
it = hufman_heap.begin();
p->left = *it;
(*it)->parent = p;
(*it)->is_left = true;
hufman_heap.erase(hufman_heap.begin());
make_heap(hufman_heap.begin(), hufman_heap.end(), cmp);
it = hufman_heap.begin();
p->right = *it;
(*it)->parent = p;
(*it)->is_left = false;
hufman_heap.erase(hufman_heap.begin());
make_heap(hufman_heap.begin(), hufman_heap.end(), cmp);
p->power = p->left->power + p->right->power;
hufman_heap.push_back(p);
make_heap(hufman_heap.begin(), hufman_heap.end(), cmp);
}
root = *hufman_heap.begin(); //建立哈夫曼树
}
void Hufman::code_hufmanTree()//给每个哈夫曼树上的节点编码
{
queue<Node*>hufman_queue;
hufman_queue.push(root);
Node * p;
while (hufman_queue.empty() == false)
{
p = hufman_queue.front();
hufman_queue.pop();
if (p->parent != NULL)
{
strcpy(p->code, p->parent->code);
for (int i = 0; i < 20; i++)
{
if (p->code[i] == '\0')
{
if (p->is_left)
p->code[i] = '0';
else
p->code[i] = '1';
p->code[++i] = '\0';
break;
}
}
}
else
{
p->code[0] = 0;
p->code[1] = '\0';
}
if (p->left == NULL&&p->right == NULL)
{
Hufman_code(p->charcter, p->code);//将字符的编码写入哈夫曼的编码表中
continue;
}
hufman_queue.push(p->left);
hufman_queue.push(p->right);
}
}
void Hufman::Hufman_code(char ch, char code[20])//将字符的编码写入哈夫曼的编码表中
{
ofstream fout("Hufman code form.txt", ios::app);
if (fout.is_open() == false)
{
cerr << "cannot open the Hufman code form.txt" << endl;
return;
}
fout << ch << " " << code << endl;
fout.close();
}
void Hufman::compress()
{
time_t start, end;
time(&start);
string form[BIGSIZE];//哈夫曼编码表 数组序号是字符的ascil序号
fstream fin;
fin.open("Hufman code form.txt");
if (fin.is_open() == false)
{
cerr << "cannot open the hufman code form";
return;
}
char ch;
string code;
while (fin.get(ch))
{
fin >> code;
form[(int)ch] = code;
fin.get();
}
fin.close();
fin.open(compress_filename);
if (fin.is_open() == false)
{
cerr << "cannot open the " << compress_filename;
return;
}
ofstream fout("Huf.bin", ios::binary);
if (fout.is_open() == false)
{
cerr << "cannot open the Huf.bin" << endl;
return;
}
Cache cache;
char c;
string temp;
while (fin.get(c))
{
if ((int)c<0 || (int)c>250)
continue;
temp = form[(int)c];//压缩文件
cache.add(temp);
while (cache.cache_is_ok == true)
{
c = cache.getCH();
fout.write((char*)&c, sizeof(char));
}
B_bit++;
}
if (cache.have_tail())
{
c = cache.getCH();
fout.write((char*)&c, sizeof(char));
}
fin.close();
fout.close();
B_bit *= 8;
time(&end);
compress_time = end - start;
}
void Hufman::decompress()
{
time_t start, end;
time(&start);
ifstream fin("Huf.bin", ios::binary);
if (fin.is_open() == false)
{
cerr << "cannot open the Huf.bin";
return;
}
char c;
string target;
char char_to_int[8] = { '0', '0', '0', '0', '0', '0', '0', '\0' };
int cc;
while (fin.read((char*)&c, sizeof(char)))
{
cc = int(c);
for (int i = 6; i >= 0; i--)
{
if (cc & 1)
char_to_int[i] = '1';
else
char_to_int[i] = '0';
cc >>= 1;
}
target += char_to_int;
}
C_bit = target.size() - 7 + flag;
fin.close();
ofstream fout("Hufman decompress.txt", ios::app);
if (fout.is_open() == false)
{
cerr << "cannot open the decompress.txt" << endl;
return;
}
Node * p = root;
for (int i = 0; i < target.size() - 7 + flag; i++)//对0 1 压缩码经行解压
{
if (target[i] == '0')
p = p->left;
else
p = p->right;
if (p->left == NULL&p->right == NULL)
{
fout << p->charcter;
p = root;
}
}
time(&end);
decompress_time = end - start;
}
void Hufman::Hufman_action()
{
Build_hufmanTree();
code_hufmanTree();
compress();
decompress();
double answer = C_bit / B_bit;
answer /= 0.01;
cout << "hufman压缩时间:" << compress_time << "s" << endl << "hufman解压时间:" << decompress_time << "s" << endl << "压缩比:" << answer << "%" << endl;
}
int main()
{
Hufman h;
h.Hufman_action();
system("pause");
return 0;
}