哈夫曼树压缩文件
在这我创造了三个头文件pch.h,compress.h,huffman.h,在这其中compress.h和huffman.h里面包含的是一些函数的和结构体的定义,而pch.h是我visual studio上预编译头文件 (不可缺少,否则在使用#include时会出错,其实里面什么都没有)以下是我的三个文件的实现代码
pch.h文件
//pch.h 头文件代码
#ifndef _pch_h_
#define _pch_h_
#endif
huffman.h文件
//huffman.h 头文件代码
#ifndef _HUFFMAN_H_
#define _HUFFMAN_H_
//Huffman树节点
struct HTNode
{
int nWeight;//权值
int nParent;//父节点
int nLchild;//左孩子
int nRchild;//右孩子
};
//Huffman树
typedef HTNode *HTree;//动态分配数组存储Huffman树
typedef char **HCode;//动态分配数组存储Huffman编码表
//生成Huffman树
int HuffmanTree(HTree &pHT, int *aWeight);
//选择Huffman树前nSize个元素中parent为0且weight最小的节点,返回其序号
int Select(HTree pHT, int nSize);
//测试函数,在控制台输入Huffman树每个节点信息
void TestHufTree(HTree &pHT);
//先序遍历Huffman树
void TestHufTreeN(int root,HTree &pHT);
//遍历Huffman树,生成Huffman编码
int HuffmanCoding(HCode &pHC, HTree &pHT);
//编写测试输出每个叶子节点的编码
void TestHufCode(int root, HTree pHT, HCode pHC);
#endif
compress.h文件
//compress.h 头文件代码
#ifndef _COMPRESS_H_
#define _COMPRESS_H_
#include "compress.h"
#include "huffman.h"
struct HEAD
{
char aType[4]; //文件类型
int nLength; //原文件长度
int aWeight[256]; //权值数值
};
//缓冲区
typedef char *BUFFER;
//压缩文件
int Compress(const char *pFilename);
//初始化文件头
int InitHead(const char *pFilename, HEAD &sHead);
//对文件进行编码压缩
int Encode(const char *pFilename, const HCode pHC, BUFFER &pBuffer, const int nSize);
//将“010101...”字符串编码成字节
char Str2byte(const char *pBinStr);
//写文件
int WriteFile(const char *pFilename, const HEAD sHead, const BUFFER pBuffer, const int nSize);
//test
void Testread(const char *pFilename);
#endif
接下来是三个源文件,compress.cpp,huffman.cpp,以及一个主函数main.cpp以下是我的三个文件的实现代码
compress.cpp代码
#include "compress.h"
#include "pch.h"
#include <stdlib.h>
#include <iostream>
using namespace std;
int Compress(const char *pFilename)
{
HEAD sHead;
if (InitHead(pFilename, sHead) == 0)
{
return 0;
}
cout << sHead.nLength << "字节" << endl;
//生成Huffman树
HTree pHT = NULL;
HuffmanTree(pHT, sHead.aWeight);
if (!pHT)
{
return 0;
}
//TestHufTree(pHT); //测试Huffman树的生成,各节点的权重,孩子和父节点
//TestHufTreeN(511,pHT); //测试Huffman树各个节点的权重
// 生成Huffman编码表
HCode pHC = NULL;
HuffmanCoding(pHC, pHT);
//TestHufCode(511, pHT, pHC); //查看Huffman编码
free(pHT);//释放Huffman空间
if (!pHC)
{
return 0;
}
//计算编码缓冲区的大小
int nSize = 0;
for (int i = 0; i < 256; i++)
{
nSize += sHead.aWeight[i] * strlen(pHC[i+1]);
}
nSize = (nSize % 8) ? nSize / 8 + 1 : nSize / 8;
//对原文件进行压缩编码
BUFFER pBuffer = NULL;
Encode(pFilename, pHC, pBuffer, nSize);
if (!pBuffer)
{
return 0;
}
//输出压缩文件
int len = WriteFile(pFilename, sHead, pBuffer, nSize);
free(pBuffer);
if (len < 0)
{
return 0;
}
cout << len << "字节" << endl;
cout << "压缩比率:" << (double)len / (double)sHead.nLength * 100 << "%" << endl;
return 1;
}
int InitHead(const char *pFilename, HEAD &sHead)
{
//初始化文件头
strcpy(sHead.aType, "HUF");//文件类型
sHead.nLength = 0;//原文件长度
for (int i = 0; i < 256; i++)//SIZE变成256
{
sHead.aWeight[i] = 0;//权值
}
//以二进制流形式打开文件
FILE *in = fopen(pFilename, "rb");
if (!in)
{
cerr << "初始化文件头失败!" << endl;
return 0;
}
//扫描文件,获取权重
int ch;
while ((ch = fgetc(in)) != EOF)
{
sHead.aWeight[ch]++;
sHead.nLength++;
}
//关闭文件,释放文件指针
fclose(in);
in = NULL;
return 1;
}
int Encode(const char *pFilename, const HCode pHC, BUFFER &pBuffer, const int nSize)
{
//以二进制流形式打开文件
FILE *in = fopen(pFilename, "rb");
if (!in)
{
cerr << "打开文件失败!" << endl;
return 0;
}
//开辟缓冲区
pBuffer = (char*)malloc(nSize * sizeof(char));
if (!pBuffer)
{
cerr << "开辟缓冲区失败 " << endl;
return 0;
}
char cd[256] = { 0 };//工作区 SIZE变成256
int pos = 0;//缓冲区指针
int ch;
//扫描文件,根据Huffman编码表对其进行压缩,压缩结果暂存到缓冲区中
while ((ch = fgetc(in)) != EOF)
{
strcpy(cd, pHC[ch+1]);//从 HC复制编码串到 cd
//压缩编码
while (strlen(cd) >= 8)
{
//截取字符串左边的8个字符,编码成字节
pBuffer[pos++] = Str2byte(cd);
//字符串整体左移8字节
for (int i = 0; i < 256 - 8; i++)//SIZE变成256
{
cd[i] = cd[i + 8];
}
}
}
if (strlen(cd) > 0)
{
pBuffer[pos++] = Str2byte(cd);
}
fclose(in);
return 1;
}
char Str2byte(const char *pBinStr)
{
char b = 0x00;
for (int i = 0; i < 8; i++)
{
b = b << 1; //左移一位
if (pBinStr[i] == '1')
{
b = b | 0x01;
}
}
return b;
}
int WriteFile(const char *pFilename, const HEAD sHead, const BUFFER pBuffer, const int nSize)
{
//生成文件名
char filename[256] = { 0 };
strcpy(filename, pFilename);
strcat(filename, ".huf");
//以二进制流形式打开文件
FILE *out = fopen(filename, "wb");
//....
//写文件头
fwrite(&sHead, sizeof(HEAD), 1, out);
//写压缩后的编码
fwrite(pBuffer, sizeof(char), nSize, out);
//关闭文件,释放文件指针
fclose(out);
out = NULL;
cout << "生成压缩文件:" << filename << endl;
int len = sizeof(HEAD) + strlen(pFilename) + 1 +nSize;
return len;
}
void Testread(const char *pFilename)
{
int ch;
int Weight[256] = { 0 };
//以二进制流形式打开文件
FILE *in = fopen(pFilename, "rb");
//扫描文件,获得权重
while ((ch = getc(in)) != EOF)
{
Weight[ch]++;
}
fclose(in);
cout<<"原文件每个字符的权值为:"<<endl;
cout << "Byte" << " Weight" << endl;
for (int i = 0; i < 256; i++)
{
printf("0x%02X %d\n", i,Weight[i]);
}
}
huffman.cpp代码
#include "huffman.h"
#include "pch.h"
#include <stdlib.h>
#include <iostream>
using namespace std;
int HuffmanTree(HTree &pHT, int *aWeight)
{
//开辟空间
int m = 2 * 256 - 1;//SIZE变成256
pHT = (HTree)malloc((m + 1) * sizeof(HTNode));
if (!pHT)
{
cerr << "内存分配失败!" << endl;
return 0;
}
//初始化树
HTree p = pHT + 1;//0号单元未使用
for (int i = 0; i < m; i++)
{
p->nWeight = (i < 256) ? aWeight[i] : 0;//SIZE变成256
p->nParent = 0;
p->nLchild = 0;
p->nRchild = 0;
p++;
}
//创建Huffman树
for (int i = 256 + 1; i <= m; i++)//SIZE变成256
{
//第一个最小元素
int s1 = Select(pHT, i - 1);
pHT[s1].nParent = i;
//第二个最小元素
int s2 = Select(pHT, i - 1);
pHT[s2].nParent = i;
pHT[i].nWeight = pHT[s1].nWeight + pHT[s2].nWeight;
pHT[i].nLchild = s1;
pHT[i].nRchild = s2;
pHT[i].nParent = 0;
}
pHT[0].nLchild = 0;
pHT[0].nParent = 0;
pHT[0].nRchild = 0;
pHT[0].nWeight = 0;
return 1;
}
int Select(HTree pHT, int nSize)
{
int minValue = 0x7FFFFFFF; //最小值
int min = 0;//元素序号
//找到最小权值的元素序号
for (int i = 1; i <= nSize; i++)
{
if (pHT[i].nParent == 0 && pHT[i].nWeight < minValue)
{
minValue = pHT[i].nWeight;
min = i;
}
}
return min;
}
void TestHufTree(HTree &pHT)
{
cout << "Byte " << "Weignt " << "Parent " << "Lchild " << "Rchild" << endl;
for (int i = 1; i < 512; i++)
{
printf("pHT[%d]\t%d\t%d\t%d\t%d\n", i, pHT[i].nWeight, pHT[i].nParent, pHT[i].nLchild, pHT[i].nRchild);
}
}
void TestHufTreeN(int root, HTree &pHT)
{
cout <<pHT[root].nWeight << " ";
if (pHT[root].nLchild != 0)
{
TestHufTreeN(pHT[root].nLchild, pHT);
}
if (pHT[root].nRchild != 0)
{
TestHufTreeN(pHT[root].nRchild, pHT);
}
}
void TestHufCode(int root, HTree pHT, HCode pHC)
{
if (pHT[root].nLchild == 0 && pHT[root].nRchild == 0)
{
printf("0x%02X %s\n", root-1, pHC[root]);
}
if (pHT[root].nLchild)
{
TestHufCode(pHT[root].nLchild, pHT, pHC);
}
if (pHT[root].nRchild)
{
TestHufCode(pHT[root].nRchild, pHT, pHC);
}
}
int HuffmanCoding(HCode &pHC, HTree &pHT)
{
//无栈非递归遍历 Huffman树,求Huffman编码
char cd[256] = { '\0' };//记录访问路径
int cdlen = 0;//记录当前路径长度
pHC = (char**)malloc(300 * sizeof(int));
for (int i = 1; i < 512; i++)
{
pHT[i].nWeight = 0;//遍历 Huffman树时用作节点状态标志
}
int p = 511;//根节点
while (p != 0)
{
if (pHT[p].nWeight == 0)//向左
{
pHT[p].nWeight = 1;
if (pHT[p].nLchild != 0)
{
p = pHT[p].nLchild;
cd[cdlen++] = '0';
}
else if (pHT[p].nRchild == 0)//登记叶子节点的字符编码
{
pHC[p] = (char*)malloc((cdlen + 1) * sizeof(char));
cd[cdlen] = '\0';
strcpy(pHC[p], cd);//复制编码
}
}
else if (pHT[p].nWeight == 1)//向右
{
pHT[p].nWeight = 2;
if (pHT[p].nRchild != 0)//右孩子为叶子节点
{
p = pHT[p].nRchild;
cd[cdlen++] = '1';
}
}
else
{
//退回父节点。编码长度减 1
pHT[p].nWeight = 0;
p = pHT[p].nParent;
--cdlen;
} //权重为1时显示已经遍历了左孩子,权重为2时已经遍历了右孩子
}
return 1;//原文中写的是返回OK,这里用1来代表
}
main.cpp文件
#include "pch.h"
#include "compress.h"
#include <iostream>
using namespace std;
int main()
{
char filename[256];
cout << "==========Huffman文件压缩 ==========" << endl;
cout << "请输入文件名 :";
cin >> filename;
Compress(filename);
getchar();
return 0;
}
运行结果如下: