数据结构的作业,压缩软件用的,具体写的过程中有哪些问题在程序里说吧。
头文件与常量部分:
利用char的8位,来存储文件里的元素。每次取出文件中的8位并记录这八位出现的次数用来进行哈夫曼数的建立。
#include<iostream>
#include<stack>
#include<string>
#include<list>
#include<iterator>
using namespace std;
//利用char进行8位一取,这样不管是字符类型还是汉字或者其他格式都可以进行压缩
const unsigned int N = 256;//char共有8位,那么一共最多有256个可能的字符,叶节点数目
const unsigned int M = 2 * N;//对于拥有n个叶结点的huffman树,一共有2*n-1个结点,数组最后一位不存数,供select函数选择使用
const unsigned long long MAX = 0xffffffffffff;
三个类的声明部分:
Buffer类用来从文件中读取8位字符或者写入8位字符。因为每次都只能从文件中读取或者写入一个字符,所以利用Buffer类进行缓冲。
treeNode类是哈夫曼树的结点类,其中保存了树中每个结点出现的频率与其左右子结点与父节点。
HuffmanTree类是哈夫曼树类,可以进行压缩或解压。
class Buffer {//缓冲类,对文件进行读取/写入操作的时候通过Buffer类进行整个字符的读取/写入
public:
unsigned int bits;//实际字节长度
char ch;//字节
};
class HuffmanTree;//huffman树类声明
class treeNode {//huffman树结点类
friend HuffmanTree;
private:
unsigned long long weight;//该结点一共出现了多少次
unsigned int right, left;
unsigned int parents;
};
class HuffmanTree {//huffman树类
private:
treeNode nodes[M];//存储树的每个结点信息,最多M个
unsigned int leaf[N];//存储叶节点字符信息,最多N个
unsigned int index[N];//存储叶节点的下标值
char* leafCode[N];//存储叶节点的编码信息
FILE* input, *output;
unsigned int num;//实际上叶结点个数
unsigned long long size;//一共有多少个字符
Buffer buf;//输入输出缓冲
//辅助函数
void write(unsigned int i);//向文件中写入一个bite
void write(unsigned int num, unsigned int bits);//向文件中写入bits位的num数
void writerest();//如果最后buf里面还有字符没有写入,用这个函数一起写入
void read(unsigned int &i);//从文件中读取一位bite,用i输出
void read(unsigned int &num, unsigned int bits);//从文件中读取bits位的数,并用num输出
//Encode辅助函数
void enSieve();//读取文件,并且统计文件中字符数
void select(unsigned int pos,unsigned int &t1,unsigned int &t2);//在0——pos间选择两个权重最小的,用t1、t2输出
void enSetTree();//对需要被压缩的文件中的字符建立huffman树,并且完善每个结点的编码信息
//Decode辅助函数
void deSieve();//读取需要被解压文件,并建造其哈夫曼树
public:
void Encode();//对文件编码
void Decode();//对文件译码
};
HuffmanTree类的实现:
读取/写入辅助函数:
主要是利用Buffer类中的bits来记录ch中实际有效的位数。在write的时候,如果ch中实际位数已经足够8位,那么将ch写入文件,然后再将bits置8,ch置0;在read的时候,如果ch的实际位数已经等于0,那么从文件中fget()一个8位数给ch,同时bits置8。同时,有可能在写入到最后的时候,bits实际上并不等于0,那么用writerest将剩下的ch全部写入。
//辅助函数块
void HuffmanTree::write(unsigned int i) {//向文件中写入一个bite
buf.ch = (buf.ch << 1) + i;//对ch加上一位
buf.bits++;//ch的八位bite的实际使用量加一
if (buf.bits == 8) {//如果全部用完,那么将buf.ch输入进output中,并将buf.bits,buf.ch置0重新开始新一轮计数
fputc(buf.ch, output);
buf.bits = 0;
buf.ch = 0;
}
}
void HuffmanTree::write(unsigned int target, unsigned int bits) {//向文件中写入bits位的num数
/*傻逼做法
for (unsigned int i = 0; i < bits; ++i) {//一位一位的放进去
write((target & 128)>>7 );//从高位依次放到低位
target <<= 1;
}
*/
stack<unsigned int> s;
unsigned int i, bit;
for (i = 1; i <= bits; i++) {
s.push(target & 1);
target = (target >> 1);
}
for (i = 1; i <= bits; i++) {
bit = s.top();
write(bit);
s.pop();
}
}
void HuffmanTree::writerest() {//如果最后buf里面还有字符没有写入,用这个函数一起写入
unsigned int now = buf.bits;
if (now>0)
for (unsigned int i = 0; i<8 - now; i++)write(0);
}
void HuffmanTree::read(unsigned int &i) {//从文件中读取一位bite,用i输出
if (buf.bits == 0) {
buf.bits = 8;
buf.ch = fgetc(input);
}
i = (buf.ch & 128) >> 7;
buf.bits--;
buf.ch <<= 1;
}
void HuffmanTree::read(unsigned int &target, unsigned int bits) {//从文件中读取bits位的数,并用num输出
unsigned int tmp;
target = 0;
for (unsigned int i = 0; i < bits; ++i) {
read(tmp);
target = (target << 1) + tmp;
}
}
EnCode()函数及其辅助函数的实现部分:
昨晚在这部分出现了一个错误点,在enSetTree函数的实现部分,如下代码:
for (int i = 0; i < N; ++i)
leafCode[i] = NULL;
被我写成了:
for (int i = 0; i <= N; ++i)
leafCode[i] = NULL;
然后就导致了FILE* input莫名其妙被置成了NULL,之后找到错误之后感慨了一下不要乱置NULL。。下标一定要看清。。。
//Encode辅助函数块
void HuffmanTree::enSieve() {//读取文件,并且统计文件中字符数
char inName[1000], outName[1000];
cout << "Input file name that you want to code:";
cin >> inName;
cout << "Input target file name:";
cin >> outName;
if ((input = fopen(inName, "rb")) == NULL) {
cout << "Can not open file." << endl;
system("pause");
exit(1);
}
if (feof(input)) {
cout << "Empty source file" << endl;
system("pause");
exit(1);
}
if ((output = fopen(outName, "wb")) == NULL) {
cout << "Can not open file." << endl;;
system("pause");
exit(1);
}
//从文件中读取字符,并统计字符出现频率
rewind(input);
unsigned int ch;
size = 0;
for (unsigned int i = 0; i < N; ++i) {
leaf[i] = 0;
index[i] = 0;
}
for (unsigned int i = 0; i < M; ++i) {
nodes[i].weight = 0;
nodes[i].left = nodes[i].right = nodes[i].parents = M-1;
}
ch = fgetc(input);
while (!feof(input)) {
leaf[ch]++;
size++;
ch = fgetc(input);
}
//nodes[N-1].weight置为最大
nodes[M-1].weight = MAX;
//筛掉出现频率为0的字符,并写入nodes,index数组,并修改num值
num = 0;
for (unsigned int i = 0; i < N; ++i)
if (leaf[i]) {
nodes[num].weight = leaf[i];
leaf[i] = num;
index[num] = i;
num++;
}
if (!num) {
cout << "doesn't have a word" << endl;
system("pause");
exit(1);
}
}
void HuffmanTree::select(unsigned int pos, unsigned int &t1,unsigned int &t2) {//在0——pos间选择两个权重最小的,用t1、t2输出
t1 = M-1, t2 = M-1;
for (unsigned int i = 0; i < pos; ++i) {
if (nodes[i].weight < nodes[t1].weight&&nodes[i].parents==M-1)
t1 = i;
}
for (unsigned int i = 0; i < pos; ++i) {
if (nodes[i].weight < nodes[t2].weight&&i != t1&&nodes[i].parents == M-1)
t2 = i;
}
}
void HuffmanTree::enSetTree() {//对需要被压缩的文件中的字符建立huffman树,并且完善每个结点的编码信息
//建立huffman树
for (unsigned int i = num; i < num * 2 - 1; ++i) {
unsigned int t1, t2;
select(i, t1, t2);
nodes[i].weight = nodes[t1].weight + nodes[t2].weight;
nodes[i].left = t1;
nodes[i].right = t2;
nodes[t1].parents = nodes[t2].parents = i;
}
for (int i = 0; i < N; ++i)
leafCode[i] = NULL;
//对每个结点进行编码
unsigned int start, c, f, i;
char *cd = new char[num]; //编码临时变量
for (i = 0; i < N; i++)
if (leafCode[i] != NULL) {
delete[]leafCode[i]; //释放存储空间
leafCode[i] = NULL;
}
cd[num - 1] = '\0'; //编码结束符
for (i = 0; i < num; i++) { //逐位求Huffman编码
start = num - 1; //编码结束符位置
for (c = i, f = nodes[i].parents; f != M - 1; c = f, f = nodes[c].parents) { //从叶到根求编码
if (nodes[f].left == c)cd[--start] = '0';
else cd[--start] = '1';
}
leafCode[i] = new char[num - start]; //为第i个字符编码分配空间
strcpy(leafCode[i], &cd[start]); //从cd复制编码到HuffmanCode
}
delete cd;
}
void HuffmanTree::Encode() {//对文件编码
enSieve();//初始化input,output;统计文件中字符
enSetTree();//根据enSieve完成huffman树的建立与对字符进行编码
rewind(output);
rewind(input);
//向output的开头中写入树结构
buf.bits = 0;
buf.ch = 0;
fwrite(&size,sizeof(unsigned long long),1,output);//写入size
write(num, 8);//将树结构中的叶结点个数写入
for (unsigned int i = 0; i < num; ++i)//将树节点中的叶节点写入
fwrite(&index[i], sizeof(char), 1, output);
//选择num最大需要多少位来存储
unsigned maxbit = 1;
unsigned int tmp = num * 2 - 1;
while (tmp) {
maxbit++;
tmp >>= 1;
}
for (unsigned int i = num; i < num * 2 - 1; ++i) {//写入左右孩子信息
write(nodes[i].left, maxbit);
write(nodes[i].right, maxbit);
}
//写入编码信息
unsigned int ch;
ch = fgetc(input);
while (!feof(input)) {
unsigned int start = 0;//判断对ch的编码leafCode[loc]的起始位置
while (leafCode[leaf[ch]][start] != '\0') {
if (leafCode[leaf[ch]][start] == '1')write(1);
else write(0);
++start;
}
ch = fgetc(input);
}
writerest();//写入剩下的字符
cout << "Done!\n\n";
fclose(input);
fclose(output);
}
DeCode()函数及其辅助函数的实现部分:
//Decode辅助函数块
void HuffmanTree::deSieve() {//读取需要被解压文件,并建造其哈夫曼树
char inName[1000], outName[1000];
cout << "Input file name that you want to decode:";
cin >> inName;
cout << "Input target file name:";
cin >> outName;
if ((input = fopen(inName, "rb")) == NULL) {
cout << "Can not open file." << endl;
system("pause");
exit(1);
}
if (feof(input)) {
cout << "Empty source file" << endl;
system("pause");
exit(1);
}
if ((output = fopen(outName, "wb")) == NULL) {
cout << "Can not open file." << endl;;
system("pause");
exit(1);
}
//开始读取树结构
rewind(input);
for (unsigned int i = 0; i < M; ++i) {
nodes[i].parents = nodes[i].right = nodes[i].left = N-1;
}
buf.bits = 0; //清空缓冲区
buf.ch = 0;
fread(&size,sizeof(unsigned long long),1, input);//读取size
read(num, 8);//读取树结构中的叶结点个数
if (num == 0)num = 256;
for (unsigned int i = 0; i < num; ++i)//读取树节点中的叶节点
fread(&index[i], sizeof(char), 1, input);
//选择num最大需要多少位来存储
unsigned maxbit = 1;
unsigned int tmp = num * 2 - 1;
while (tmp) {
maxbit++;
tmp >>= 1;
}
for (unsigned int i = num; i < num * 2 - 1; ++i) {//读取左右孩子信息
read(nodes[i].left, maxbit);
read(nodes[i].right, maxbit);
nodes[nodes[i].left].parents = nodes[nodes[i].right].parents = i;
}
}
void HuffmanTree::Decode() {
deSieve();
//开始译码
rewind(output);
unsigned int tmp;
read(tmp);
for (int i = 0; i < size; ++i) {
unsigned int loc = 2 * num - 2;
while ((nodes[loc].left != N-1 || nodes[loc].right != N-1) && !feof(input)) {
if (tmp == 0)loc = nodes[loc].left;
else loc = nodes[loc].right;
read(tmp);
}
fputc(index[loc], output);
}
cout << "Done!\n\n";
fclose(input);
fclose(output);
}
写下来大概感受就是注意二进制的长短,以及不要写的头晕了。。。长度各种乱。。。奇葩。。
测试部分:
#pragma warning(disable:4996)
#include<iostream>
#include<cstdio>
#include<cmath>
#include<stack>
#include<queue>
#include<cstring>
#include<sstream>
#include<set>
#include<string>
#include<iterator>
#include<vector>
#include<map>
#include<algorithm>
#include"HuffmanTree.h"
using namespace std;
int main(void) {
cout << sizeof(char) << endl;
char choose = '1';
while (choose != '3') {
HuffmanTree tree;
cout << "1.Huffman Encode" << endl;
cout << "2.Huffman Decode" << endl;
cout << "3.exit" << endl;
cin >> choose;
switch (choose) {
case'1':tree.Encode(); break;
case'2':tree.Decode(); break;
default:break;
}
}
// system("pause");
return 0;
}
测试效果:
第一波(纯文字):
源:
压缩后:
解压后:
文件大小对比:
第二波(图片):
源:
压缩后:
解压后:
大小对比:
其实压缩效果并不是很好,有待改进。