思路已在代码块注释里
#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include <fstream>
#include<cstdlib>
using namespace std;
/*哈夫曼
* 1.读入文件并计算权重
* 2.编码建树
* 注:这就是我们的基础树 压缩解压都是根据这个树来的
* 注1:为了确保解压和压缩文件相同,请记住您建树的文本文档,这将是您的密钥 压缩文件相当于公钥
*/
/*哈夫曼压缩算法:
* 1.回到文件头,开始以位运算的形式压缩
* 2.开始存储 根据树 右移存储
* 这里有一个问题 就是如何保证存入的是位字符串
* 1.位满8位自动存入 如何判断位满八位
* 开始 0000_0000 1111_1111 存入8位数字,将被替换
* 1111_0011 0000_0000 1111_1111 即如果第二个数字成为了0意味着前面那个存满了八位,就可以进行存储了
* 所以采用int的形式
*/
/*哈夫曼解压算法:
* 1.读入文件 开始读取位
* 2.根据树开始索引解压
*/
int c_weight[128] = {};//记录各个字符权重
typedef struct {
unsigned int weight;
//parent 记录是否被引用
unsigned int parent, lchid, rchild;
} HTNode, * HuffmanTree;
typedef char** HuffmanCode;
/*
* name:权重计算
* author:wyh
* func:完成权重计算
* elem:txtw是文档路径
* 将在函数内完成文档的打开和关闭
*/
int WeightCul(char* txtW)
{
ifstream infile(txtW, ios::in);
//进行文件打开判断
if (!infile.is_open())
{
cout << "文件打开失败" << endl;
return 0;
}
char ch;//读入字符
while (!infile.eof())
{
ch = infile.get();
c_weight[ch]++;
}
infile.close();
}
void Select(HuffmanTree HT, int n, int &s1, int &s2)
{
int i;
int min = 99999;
//找到weight最小的结点,下标存在s1中
for (i = 1; i <= n; i++)
{//未被使用的并且是最小的结点
if (HT[i].weight < min && HT[i].parent == 0)
{
min = HT[i].weight;
s1 = i;
}
}
//找到weight次小的结点,下标存在s2中
int next_min = 99999;
for (i = 1; i <= n; i++)
{//未被使用的并且是最小的结点
if (HT[i].weight < next_min && HT[i].parent == 0&&(i!=s1))
{
next_min = HT[i].weight;
s2 = i;
}
}
}
/*
* name:哈夫曼编码
* author:wyh
* func:完成哈夫曼编码工作
* elem:n是文档中字符总个数
* *ch是编码串
* *w是权重表
*/
void HuffmanCoding(HuffmanTree& HT,const int* w,const int n)
{
HuffmanTree p;
int s1, s2;
int i;
if (n <= 1)
return;
//因为n个字符最多出现2*n-1个结点
int m = 2 * n - 1;
HT = new HTNode[m + 1];
//完成n个结点的初始化操作
//n个结点存放数据
for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)
{
p->weight = *w;
p->parent = 0;
p->lchid = 0;
p->rchild = 0;
}
//n-1个中间结点不存放数据 初始化
for (; i <= m; ++i, ++p)
{
p->weight = 0;
p->parent = 0;
p->lchid = 0;
p->rchild = 0;
}
//初始化完成,开始建树
for (i = n + 1; i <= m; i++)
{
//这里应该有一个挑选节点生成二叉树的函数
Select(HT, i - 1, s1, s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchid = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
}
/*
* name:哈夫曼译码
* author:wyh
* func:完成哈夫曼译码工作
* elem:无需对树更改 const 变量
* n是文档中字符总个数
* *ch是编码串
* (读入文件需要有字符串)
*/
void HuffmanTranCoding(const HuffmanTree HT,const int n,char *ch)
{
int m = 2 * n - 1;
int i;
int j = 0;
//输入0100001串完成对应字母转码
while (ch[j] != '\0')
{
i = m;//从根开始找
while (HT[i].lchid != 0 && HT[i].rchild != 0)
{
if (ch[j] == '0')
{
i = HT[i].lchid;
}
else
i = HT[i].rchild;
++j;
}
//找到了 在HT[i]
/*cout<<*/
}
}
/*
* name:哈夫曼压缩
* author:wyh
* func:完成哈夫曼压缩工作 将在函数内打开文件并进行读取压缩
* elem:无需对树更改 const 变量
*return:为0表示存储失败,为1表示存储成功
*/
int HuffmanZip(const HuffmanTree HT,int n,bool is_need_cin=true,char* txtw=NULL,char* txtd=NULL)
{
int count_char = 0;//记录读入的字符总数
int count_bit = 0;//记录写入的bit总数
int start;
int f,c;
int i;
char *txtW = {};
char *txtD = {};
if (is_need_cin) {
cout << "请输入要压缩的文档的绝对路径(请确保绝对路径不要过长)" << endl;
txtW = new char[256];
cin >> txtW;
cout << "请输入想要的生成压缩文件的名称" << endl;
txtD = new char[256];
cin >> txtD;
}
else
{
txtW = txtw;
txtD = txtd;
}
ifstream infile(txtW, ios::in);
//进行文件打开判断
if (!infile.is_open())
{
cout << "文件打开失败" << endl;
return 0;
}
ofstream outfile(txtD, ios::out);
//cout << txtD << endl;
if (!outfile.is_open())
{
cout << "文件打开失败" << endl;
infile.close();
return 0;
}
//infile.seekg(ios::end);
//cout<< infile.tellg();
//infile.seekg(ios::beg);
unsigned int cd;
cd = 0x0fffffff;
char ch;
char put_ch;
//这是一种后端给值的方式,从后往前给了一个串,我们要做的 同样是从后往前,完美
/*满一个char就送进去 送一个char*/
while (!infile.eof())
{
ch = infile.get();
c = ch+1;
for (f = HT[ch+(ch != -1)].parent; f != 0; c = f, f = HT[f].parent)
{
if (HT[f].lchid == c)
{
//高位置0
cd = cd >> 1;
cd = cd &0x7fffffff;
}
else
{
//高位置1
cd = cd >> 1;
cd = cd | 0x80000000;
}
count_bit++;
if ((cd & 0xffffff) == 0xfffff)
{
//这一步可能出错
//ch = cd >> 24;
put_ch = cd >> 24;
cout <<ch<< hex << cd;
outfile.put(put_ch);
cd = 0x0fffffff;
count_char++;
}
}
}
if (count_bit % 8 != 0)
{
//将最后几个bit直接输入
put_ch = cd >> (24 + 8 - count_bit % 8);
cout << ch << hex << cd;
outfile.put(put_ch);
}
int count_left = count_bit % 8;
outfile .put( count_left);
infile.close();
outfile.close();
}
void PrintHuffman(HuffmanCode &HC,int n,const HuffmanTree HT)
{
int start;
int i;
int c, f;
HC = new char* [n + 1];
char* cd;
cd = new char[n];
cd[n - 1] = '\0';//末尾置零
for (i = 1; i <= n; i++)
{
start = n - 1;
for (c = i, f = HT[c].parent; f != 0; c = f, f = HT[f].parent)
{
if (HT[f].lchid == c)
cd[--start] = '0';
else
cd[--start] = '1';
HC[i] = new char[n - start];
strcpy(HC[i], &cd[start]);//非常巧妙的做法
}
cout <<i-1<<" "<<char(i-1)<<" "<< HC[i] << endl;
}
delete[]cd;
}
int RevHuffmanUnZip(const HuffmanTree HT, int n,char *txtW,char * txtD)
{
ifstream infile(txtW, ios::in);
//进行文件打开判断
if (!infile.is_open())
{
cout << "文件打开失败" << endl;
return 0;
}
ofstream outfile(txtD, ios::out);
//cout << txtD << endl;
if (!outfile.is_open())
{
cout << "文件打开失败" << endl;
infile.close();
return 0;
}
infile.seekg(-1,ios::end);
int count = 0;
int left = infile.get();
count++;
//开始漫游树
//假设上面最后剩下了5个bit 那么 8个里面有3个0和5个有效数字
//找到最大根结点位置
int weight_max = 0;
int pos = 0;
for (int i = 1; i <= 2 * n - 1; i++)
{
if (HT[i].weight > weight_max)
{
weight_max = HT[i].weight;
pos = i;
}
}
//文本最后一个字节存着有效字节数
//读编码
unsigned char ch;
int now_value;
//先特例化处理第一个字符
infile.seekg(-1-count, ios::end);
ch = infile.get();
count++;
int i = 0;
int count_weishu = 1;//记录char中8个字节读了多少位
while (1) {
i = pos;//从根开始找
if (count_weishu > left)
break;
if (left == 0)
break;
while (HT[i].lchid != 0 && HT[i].rchild != 0)
{
if ((now_value = (ch >> (left - count_weishu)) & 1) == 0)//若为1 向右走
{
i = HT[i].lchid;
}
else//否则向左走
i = HT[i].rchild;
count_weishu++;
//如果多余的字符走完了还没找到结果 开始正常的读字符吧
if (count_weishu > left)
break;
}
//找到了 在HT[i]
if (!(HT[i].lchid != 0 && HT[i].rchild != 0))
{
outfile << char(i - 1);
continue;//直到left用完了为止
}
else
{
break;
}
}
//开始正常处理:
count_weishu = 1;
bool is_find = false;
while (1)
{
infile.seekg(-1 - count, ios::end);
if (infile.tellg() == -1)
return 1;//读完了,结束了!
ch = infile.get();
count++;
//i = pos;不使用它的原因是要用上一个结点
while ((HT[i].lchid != 0 && HT[i].rchild != 0)||(count_weishu<=8))
{
is_find = false;
if ((now_value = (ch >> (8 - count_weishu)) & 1) == 0)//若为1 向右走
{
i = HT[i].lchid;
}
else//否则向左走
i = HT[i].rchild;
count_weishu++;
//找到了 在HT[i]
if (!(HT[i].lchid != 0 && HT[i].rchild != 0))
{
outfile << char(i - 1);
i = pos;
is_find = true;
if (count_weishu > 8)
break;
continue;//直到left用完了为止
}
//如果8个字符走完了还没找到结果 读下一个字符
if (count_weishu > 8)
break;
}
//出来是因为 找完了并且8个字符用完了
if (is_find)
i = pos;
count_weishu = 1;
}
infile.close();
outfile.close();
}
int HuffmanUnZip(const HuffmanTree HT, int n)
{
int count_char = 0;//记录读入的字符总数
int count_bit = 0;//记录写入的bit总数
char txtW[256] = {};
char txtD[256] = {};
int start;
int f, c;
int i;
cout << "请输入要解压的文档的绝对路径(请确保绝对路径不要过长)" << endl;
cin >> txtW;
cout << "请输入想要的生成解压文件的名称" << endl;
cin >> txtD;
RevHuffmanUnZip(HT, 128, txtW, txtD);
HuffmanZip(HT, 128,false,txtD,txtW);
RevHuffmanUnZip(HT, 128, txtW, txtD);
return 0;
}
int main()
{
cout << "请输入作为存树的文档的绝对路径(请确保绝对路径不要过长)" << endl;
char txtW[256] = {};
cin >> txtW;
WeightCul(txtW);
HuffmanTree HT;
//生成树
HuffmanCoding(HT,c_weight,128);
HuffmanCode HC;
PrintHuffman(HC, 128, HT);
//选择压缩还是解压
cout << "请选择压缩还是解压 1表示压缩,0表示解压" << endl;
bool judge;
cin >> judge;
if (judge)
HuffmanZip(HT,128);
else
HuffmanUnZip(HT,128);
}