#define _CRT_SECURE_NO_DEPRECATE
#include
#include
#include
#include
#include
#include
#include
using namespace std;
int bytes_count = 0;//哈夫曼树的叶子节点
long long file_length = 0;//保持文件的字节数,初始为0
const int FILENAMEMAX = 128;//文件名(包括路径),长度不能超过127
const int SUFFIXNAMEMAX = 10;//文件后缀名长度不超过9个字符
char sourse_filename[FILENAMEMAX] = "test.txt";//源文件名字
char compress_filename[FILENAMEMAX] = "test.buf";//压缩文件名字
char decompress_filename[FILENAMEMAX] = "detest";//解压文件名字
char prefix_filename[128];//文件前缀名
char suffix_filename[128];//文件后缀名
const int BUFF_MAXSIZE = 1024 * 1024;//文件哈夫曼编码缓冲区的大小
char buff[BUFF_MAXSIZE] = "";/*文件哈夫曼编码缓冲区(生成压缩文件时,各字节
字节的编码先存在buff中,然后8个码压缩成一个字节存到buff_string中缓冲区)*/
char buff_string[BUFF_MAXSIZE / 8] = "";//压缩文件写入缓冲区
const int BLOCK_MAXSIZE = 1024 * 1024;//解写入解压文件的缓冲区的大小
char block[BLOCK_MAXSIZE] = "";//写入解压文件的缓冲区
int block_current = 0;//写解压文件缓冲区的当前读写位置
struct HaffNode
{
unsigned char byte;//结点数据
long long weight;//权值
int parent;//双亲结点下标
int leftChild;//左孩子下标
int rightChild;//右孩子下标
char code[256];//哈夫曼编码
int code_len;//编码长度
};
HaffNode HaffTree[511];
HaffNode HaffTree2[511];
HaffNode HaffTree3[511];
void initHaffTree();
void Statistics();
bool compare(HaffNode a, HaffNode b);
void paixu();
void createTree();
void creatHaffCode();
void writeCompressFile();
void uncompress();
void ptf();
#include"T.h"
int main()
{
writeCompressFile();
uncompress();
}
void initHaffTree()
{
for (int i = 0; i < 511; i++)
{
HaffTree[i].parent = HaffTree[i].leftChild = HaffTree[i].rightChild = -1;
HaffTree[i].weight=0;
HaffTree[i].byte=0;
}
}
void Statistics()
{
FILE *ifp;
//printf("\t请你输入需要压缩的文件名: ");
cin >> sourse_filename;//获取文件路径
ifp = fopen(sourse_filename, "rb");
if (ifp == NULL)
{
printf("\n\t文件打开失败!\n\n");
system("pause");
}
//
unsigned char c;//接收读取的一个字节
while (!feof(ifp))
{
fread(&c, 1, 1, ifp);//从文件中读取一个字节到c
HaffTree[c].weight++;
file_length++;//统计源文件长度,每读一字节长度+1
}
file_length--;//原文件的长度多计算了一次
HaffTree[c].weight--;
/*cout << "文件总长度:";
cout << file_length<
system("pause");*/
for (int i = 0; i < 511; i++)
{
if (HaffTree[i].weight != 0)
{
HaffTree[i].byte = (unsigned char)i;
}
}
fclose(ifp);
}
void paixu()
{
sort(HaffTree, HaffTree + 511, compare);
}
bool compare(HaffNode a, HaffNode b)
{
return a.weight > b.weight;
}
void createTree()
{
int i, n, m;
for (i = 0; i < 511; i++)
if (HaffTree[i].weight == 0)
break;//一旦发现权值为0,后面都为0
bytes_count = i;
n = i;
m = 2 * n - 1;
/*printf("叶子结点n=%d, 总结点m=%d\n", n, m);
system("pause");*/
int j;
long min1, pt1;
for (i = n; i < m; i++)//构建哈夫曼树的后n-1个结点
{
min1 = 999999;//预设的最大权值,即结点出现的最大次数
for (j = 0; j < i; j++)
{
//parent!=-1 说明该结点已在哈夫曼树中,跳出循环重新选择新节点
if (HaffTree[j].parent != -1)
continue;
if (min1>HaffTree[j].weight)
{
pt1 = j;//min最小时的下标
min1 = HaffTree[j].weight;//min1为最小
continue;
}
}
//上面已经取出最小的,把它作为哈夫曼树的左结点,设置好相关信息
HaffTree[i].weight = HaffTree[pt1].weight;
HaffTree[pt1].parent = i;//找到第i个结点的左孩子
HaffTree[i].leftChild = pt1;//计算左分支权值大小
min1 = 999999;
for (j = 0; j < i; j++)
{
if (HaffTree[j].parent != -1)
continue;
if (min1>HaffTree[j].weight)
{
pt1 = j;//min最小时的下标
min1 = HaffTree[j].weight;//min1为最小
continue;
}
}
HaffTree[i].weight += HaffTree[pt1].weight;
HaffTree[i].rightChild = pt1;//设置第i个结点的右孩子
HaffTree[pt1].parent = i;//设置第i个结点右孩子的父结点信息
}
/*printf("下标\t字节值\t权值\t左孩子\t右孩子\t\t双亲结点\n");
for (int i = 0; i < 511; i++)
{
printf("%d\t%x\t%d\t", i, HaffTree[i].byte, HaffTree[i].weight);
printf("%d\t%d\t\t%d\n", HaffTree[i].leftChild, HaffTree[i].rightChild, HaffTree[i].parent);
}
system("pause");*/
/*ptf();*/
}
void creatHaffCode()
{
int i,m,n;
n = bytes_count;
m = 2 * n - 1;
int f;
for (i = 0; i < m; i++)//为每一个结点编码
{
f = i;
HaffTree[i].code[0] = 0;//AscII码为0的字符,即为\0结束符
while (HaffTree[f].parent != -1)//若没找到根节点
{
int j = f;//也就是i
f = HaffTree[f].parent;
if (HaffTree[f].leftChild == j)//置左分支编码0
{
j = strlen(HaffTree[i].code);
//拷贝,留出位置放当前的编码
//j+1意味拷贝时把\0复制,memmove不出现重叠部分被覆盖
memmove(HaffTree[i].code + 1, HaffTree[i].code, j + 1);
//依次存储连接"0" "1"编码
HaffTree[i].code[0] = '0';//左分支记为0
}
else//置右分支编码1
{
j = strlen(HaffTree[i].code);
//拷贝,留出位置放当前的编码
//j+1意味拷贝时把\0复制,memmove不出现重叠部分被覆盖
memmove(HaffTree[i].code + 1, HaffTree[i].code, j + 1);
//依次存储连接"0" "1"编码
HaffTree[i].code[0] = '1';//右分支记为0
}
}
}
}
void ptf()
{
printf("下标\t字节值\t权值\t左孩子\t右孩子\t\t双亲结点\n");
for (int i = 0; i < 110; i++)
{
printf("%d\t%x\t%d\t", i, HaffTree[i].byte, HaffTree[i].weight);
printf("%d\t%d\t\t%d\n", HaffTree[i].leftChild, HaffTree[i].rightChild, HaffTree[i].parent);
}
/*system("pause");*/
}
//void writeCompressFile()
//{//生成压缩文件
//从源文件的名中获取文件的类型
//以读的方式打开二进制源文件ifp;以写的方式打开二进制压缩文件ofp;
//向ofp文件中写入头部信息(注意,一定要先确保头部信息无误才往下编写程序)
//清空buff缓冲区;
//从源文件ifp中读取一个字节到 c
//while (ifp文件没有结束)
//{
//if (缓冲区剩余的空间至少还可以多放一个字节的哈夫曼编码) {
//(注意:哈夫曼编码的长度小于256)
//查出c字节对应的哈夫曼编码字符串,接入到缓冲区中。
//从源文件ifp中再读取一个字节到 c
//}
//else {//缓冲区快要满了
//把缓冲区中尽可能多的内容写入文件中。
//(注意:缓冲区中的一个元素,对应一位;即需要把缓冲区中的每8个元素先压成一个字节,才能写入文件压缩文件中,建议在切分字节之前,先把哈夫曼编码数出来看一看,确保无误之后在切分。)
//}
//}
//把缓冲区中剩余的字符写入文件
//(注意:若缓冲区中剩余的字符数量不是8的倍数时,即最后一个字节是不完整的,需要在低位进行补0,再写入)
//}
//关闭ofp文件
//关闭ifp文件
//}
void writeCompressFile()
{
initHaffTree();
Statistics();
paixu();
createTree();
creatHaffCode();
char *ptr,str[9];
ptr = strchr(sourse_filename, '.');
strncpy(suffix_filename, ptr+1, 6);
/*printf("%s\n", suffix_filename);*/
int s = strlen(suffix_filename);
/*cout << strlen(suffix_filename) << endl;*/
FILE *ifp, *ofp;
ifp = fopen(sourse_filename, "rb");//rb读取2进制
if (ifp == NULL)
{
printf("\n\t 文件打开失败!\n\n");
return;
}
ofp = fopen(compress_filename, "wb");
if (ofp == NULL)
{
printf("\n\t 文件压缩失败!\n\n");
return;
}
fprintf(ofp,"%d,",s);
fprintf(ofp, "%s,",suffix_filename);
fprintf(ofp, "%lld,", file_length);
fprintf(ofp, "%d,", bytes_count);
for (int i = 0; i
{
fprintf(ofp, "%c,", HaffTree[i].byte);
fprintf(ofp, "%lld,", HaffTree[i].weight);
}
/*memcpy(HaffTree2, HaffTree, 5000);*/
HaffTree2[511] = HaffTree[511];
for (int i = 0; i < 511; i++)
{
HaffTree2[i].parent = HaffTree2[i].leftChild = HaffTree2[i].rightChild = -1;
}
for (int i=0;i<511;i++)
{
unsigned char b;
b = HaffTree[i].byte;
HaffTree2[b].byte = HaffTree[i].byte;
HaffTree2[b].weight = HaffTree[i].weight;
HaffTree2[b].parent = HaffTree[i].parent;
HaffTree2[b].leftChild = HaffTree[i].leftChild;
HaffTree2[b].rightChild = HaffTree[i].rightChild;
strcpy(HaffTree2[b].code, HaffTree[i].code);
HaffTree2[b].code_len = HaffTree[i].code_len;
}
unsigned char c;//接收读取的一个字节
while (!feof(ifp))
{
c = fgetc(ifp);
if (strlen(buff)
{
strcat(buff, HaffTree2[c].code);
}
/*else
{
}*/
}
/*printf("%s\n", buff);*/
int strlength = strlen(buff);
int i,j;
for (i = 0; (i*8)
{
int a = 0;
unsigned char ch = 0;
for (j=(i*8);j
{
if (buff[j] == '1')
{
ch = ch + pow(2, 7 - a);
}
a++;
}
/*printf("%u\n", ch);*/
buff_string[i] = ch;
}
fprintf(ofp, "%s", buff_string);
/*printf("%s\n", buff_string);
system("pause");*/
fclose(ifp);
fclose(ofp);
/*printf("%d\n", i);
printf("%d\n", j);
*/
}
void uncompress()
{
FILE *ifp, *ofp;
int suffix_filename_len;
ifp = fopen(compress_filename, "r");
if(ifp==NULL){
cout<
exit(0);
}
fscanf(ifp,"%d,",&suffix_filename_len);
fread(suffix_filename,1,suffix_filename_len,ifp);
fscanf(ifp,",%lld,",&file_length);
fscanf(ifp,"%d,",&bytes_count);
strcat(decompress_filename,".");
strcat(decompress_filename, suffix_filename);
ofp = fopen(decompress_filename, "wb");
if (ofp == NULL)
{
printf("\n\t 生成文件压缩失败!\n\n");
exit(0);
}
initHaffTree();
for(int i=0;i
{
fscanf(ifp,"%c,",&HaffTree[i].byte);
fscanf(ifp,"%lld,",&HaffTree[i].weight);
}
createTree();
/*ptf();*/
strcpy(buff,"");
/*printf("%s\n",buff);*/
strcpy(block,"");
int refile_lenght = 0;
int buff_current=0;
int byte_current=0;
int tree_current=bytes_count*2-2;
fread(buff,10000,10000,ifp);
printf("%s\n",buff);
while (refile_lenght!=file_length)
{
if (byte_current == 8)
{
buff_current++;
byte_current = 0;
}
int a= pow(2, 7 - byte_current);
if(HaffTree[tree_current].leftChild==-1 && HaffTree[tree_current].rightChild==-1)
{
fprintf(ofp, "%c", HaffTree[tree_current].byte);
tree_current = bytes_count * 2 - 2;
refile_lenght++;
}
else
{
if ((buff[buff_current] & a) > 0)
{
tree_current = HaffTree[tree_current].rightChild;
byte_current++;
}
else
{
tree_current = HaffTree[tree_current].leftChild;
byte_current++;
}
}
}
fclose(ifp);
fclose(ofp);
}
//void deCompressFile() {//解压文件件
//打开压缩文件
//开始读文件头部(包括源文件后缀名, 字节总数,被编码字节数,各字节的字节频度)
//initHaffTree();//初始化哈夫曼树存储结构
//creatHaffTree();//构造哈夫曼树
//初始化读出数据缓冲区buff为空(压缩文件批量读出,有利于提高解压速度)
//初始化写入数据缓冲区block空(解压出的字节,批量写入有利于提高解压速度)
//初始化解压出来的字节数refile_lenght = 0;
//初始化读缓冲区字节游标buff_current = 0;
//初始化读缓冲区位游标byte_current = 0;
//初始化当前哈夫曼结点tree_current = bytes_count - 2;// 根结点位置
//读压缩文件,装满读缓冲区 ;
//while (refile_lenght != 源文件的字节总数) { //源文件未被解压完成
//if (tree_current位置的结点是叶子结点) {
//把该结点的的原字节,接入block缓冲区中,
//如果block缓冲区满,则把block缓冲区中的内容写入解压文件中。
//tree_current回到根结点位置。
//}
//else {
//If(buff缓冲区中的buff_current位置的从左数第byte_current位是1)
//tree_current = tree_current结点的右孩子结点;
// else
// tree_current = tree_current结点的左孩子结点;
//
//}
//if (byte_current<7) //一个字节未完
//byte_current++’;// 字节游标不变,位游标加1
//else {
//byte_current = 0;
//buff_current++;
//If(buff_current == BUFF_MAXSIZE) { //缓冲区中的数据已经用完了
//buff装入新的数据;
//buff_current = 0;
//}//if
//}//else
//}//while
//把block缓冲区中剩余的字节,写入解压文件中。
//关闭ifp文件;
//关闭ofp文件;
//}