哈夫曼编码作用:
1、压缩文件(当文件越大,且里面出现重复字符的概率越高的时候压缩越明显;但如果文件很小的话可以不会压缩,反而比原文件更大);
2、文件简单加密;
编码步骤
1、读取文件中的每种字符出现的次数写入CodeNum[256]中。
2、根据CodeNum来构建哈夫曼树。
2.1、拷贝一份CodeNum,命名为_CodeNum;
2.2、每次在_CodeNum中找到两个出现次数最少的来进行组合作为一个结点,其结点的使用次数为其两者之和,把原来两个使用次数清空,把再结点放进_CodeNum中使用次数为0的地方。
2.3、不断重复2.2直至_CodeNum中有使用次数的数据只剩一个;
3、根据哈夫曼树来对文件中的字符进行编码,并且记录一共有多少个字符,以及哈夫曼编码的字节数。
3.1、从文件的头字符开始,从哈夫曼树的根结点往下遍历,直到找到对应的字符结点,把其该字符的哈夫曼编码记录下来(按照二进制的方法记录,以bit位单位,字节为进位);直到把整个文件给读取完;
4、把CodeNum、原文件的字符个数,哈夫曼编码的大小、哈夫曼编码写入一个新文件中,这个就是哈夫曼编码
代码实现
struct HufNode
{
unsigned char c;
char code;
int usetime;
//
HufNode *pParent;
HufNode *pLc;
HufNode *pRc;
};
//拷贝数组
struct HufArrayNode
{
int usetimes;
HufNode *pRoot;
};
//进行数组和树之间的转接
struct HufData
{
unsigned char c;
int usetime;
HufNode *pRoot;
};
struct HufCodeData
{
char code[256];
int length;
};
bool FindMinHufDataInHufArray(HufArrayNode *pHufArray, HufData &hufdata)
{
hufdata.c = '\0';
hufdata.usetime = 0;
for (int i = 0; i < 256; ++i)
{
if (hufdata.usetime == 0)
{
if (pHufArray[i].usetimes> 0)
{
hufdata.c = i;
hufdata.usetime = pHufArray[i].usetimes;
hufdata.pRoot = pHufArray[i].pRoot;
}
}
else
{
if (pHufArray[i].usetimes>0)
{
if (pHufArray[i].usetimes < hufdata.usetime)
{
hufdata.c = i;
hufdata.usetime = pHufArray[i].usetimes;
hufdata.pRoot = pHufArray[i].pRoot;
}
}
}
}
if (hufdata.usetime > 0)
{
pHufArray[hufdata.c].usetimes = 0;
pHufArray[hufdata.c].pRoot = nullptr;
return true;
}
else
{
return false;
}
}
bool FindMinOver(HufArrayNode const * const pHufArray)
{
int num = 0;
for (int i = 0; i < 256;++i)
{
if (pHufArray[i].usetimes>0)
{
++num;
}
}
return num == 1;
}
bool FindHufCodeInHufTree(HufNode *pRoot, unsigned char c, HufCodeData &hufcodedata)
{
bool ret = false;
if (pRoot->pLc&&!ret)
{
hufcodedata.code[hufcodedata.length++] = 1;
ret = FindHufCodeInHufTree(pRoot->pLc, c, hufcodedata);
if (!ret)
{
--hufcodedata.length;
}
}
if (pRoot->pRc&&!ret)
{
hufcodedata.code[hufcodedata.length++] = 0;
ret = FindHufCodeInHufTree(pRoot->pRc, c, hufcodedata);
if (!ret)
{
--hufcodedata.length;
}
}
if ((pRoot->pRc == nullptr)&&(pRoot->pLc ==nullptr))
{
if (pRoot->c == c)
{
ret = true;
}
}
return ret;
}
void DestroyTree(HufNode *pRoot)
{
if (pRoot->pLc)
{
DestroyTree(pRoot->pLc);
}
if (pRoot->pRc)
{
DestroyTree(pRoot->pRc);
}
if (pRoot)
{
delete pRoot;
pRoot = nullptr;
}
}
HufNode *pRoot = nullptr;
int _tmain(int argc, _TCHAR* argv[])
{
if (argc < 2)
{
printf("没有待编码文件\n");
getchar();
return 0;
}
FILE *orgfp = nullptr;
fopen_s(&orgfp, argv[1], "rb");
if (orgfp)
{
printf("开始哈夫曼编码\n");
int CodeNum[256] = { 0 };
HufArrayNode _CodeNum[256];
unsigned char _SingleChar = '\0';
//fread返回值为0代表文件结束或发生错误,不然表示成功读取多少个元素
while (fread(&_SingleChar,sizeof(unsigned char),1, orgfp))
{
++CodeNum[_SingleChar];
}
for (int i = 0; i < 256;++i)
{
_CodeNum[i].usetimes = CodeNum[i];
_CodeNum[i].pRoot = nullptr;
}
HufData hufdata;
printf("开始生成树\n");
while (!FindMinOver(_CodeNum))
{
HufNode *pTempRc;
HufNode *pTempLc;
HufNode *pTempRoot;
//右节点
if (FindMinHufDataInHufArray(_CodeNum, hufdata))
{
if (hufdata.pRoot)
{
pTempRc = hufdata.pRoot;
pTempRc->c = 0;
pTempRc->code = 0;
}
else
{
pTempRc = new HufNode;
pTempRc->c = hufdata.c;
pTempRc->usetime = hufdata.usetime;
pTempRc->code = 0;
pTempRc->pParent = nullptr;
pTempRc->pLc = nullptr;
pTempRc->pRc = nullptr;
}
}
//左节点
if (FindMinHufDataInHufArray(_CodeNum, hufdata))
{
if (hufdata.pRoot)
{
pTempLc = hufdata.pRoot;
pTempLc->c = 0;
pTempLc->code = 1;
}
else
{
pTempLc = new HufNode;
pTempLc->c = hufdata.c;
pTempLc->usetime = hufdata.usetime;
pTempLc->code = 1;
pTempLc->pParent = nullptr;
pTempLc->pLc = nullptr;
pTempLc->pRc = nullptr;
}
}
pTempRoot = new HufNode;
pTempRoot->c = 0;
pTempRoot->usetime = pTempLc->usetime + pTempRc->usetime;
pTempRoot->code = 0;
pTempRoot->pParent = nullptr;
pTempRoot->pLc = pTempLc;
pTempRoot->pRc = pTempRc;
pTempLc->pParent = pTempRoot;
pTempRc->pParent = pTempRoot;
for (int i = 0; i < 256;++i)
{
if (_CodeNum[i].usetimes == 0)
{
_CodeNum[i].usetimes = pTempRoot->usetime;
_CodeNum[i].pRoot = pTempRoot;
break;
}
}
}
if (FindMinHufDataInHufArray(_CodeNum, hufdata))
{
pRoot = hufdata.pRoot;
}
printf("成功生成树\n");
printf("开始编码\n");
rewind(orgfp);
fseek(orgfp, 0, SEEK_END);
int orgfilesize = ftell(orgfp);
unsigned char *pOutBuffer = new unsigned char[256 / 8 * orgfilesize];
memset(pOutBuffer, 0, sizeof(unsigned char)* (256 / 8 * orgfilesize));
int ByteSit = 0;
int BitSit = 0;
HufCodeData hufcodearray[256];
for (int i = 0; i < 256; ++i)
{
hufcodearray[i].length = -1;
}
rewind(orgfp);
while (fread(&_SingleChar, sizeof(unsigned char), 1, orgfp))
{
HufCodeData hufcodedata;
hufcodedata.length = 0;
if (hufcodearray[_SingleChar].length<0)
{
FindHufCodeInHufTree(pRoot, _SingleChar, hufcodedata);
hufcodearray[_SingleChar] = hufcodedata;
}
else
{
hufcodedata = hufcodearray[_SingleChar];
}
for (int l = 0; l < hufcodedata.length; ++l)
{
if (hufcodedata.code[l])
{
switch (BitSit)
{
case 0:
{
*(pOutBuffer + ByteSit) |= 0x80;
}
break;
case 1:
{
*(pOutBuffer + ByteSit) |= 0x40;
}
break;
case 2:
{
*(pOutBuffer + ByteSit) |= 0x20;
}
break;
case 3:
{
*(pOutBuffer + ByteSit) |= 0x10;
}
break;
case 4:
{
*(pOutBuffer + ByteSit) |= 0x08;
}
break;
case 5:
{
*(pOutBuffer + ByteSit) |= 0x04;
}
break;
case 6:
{
*(pOutBuffer + ByteSit) |= 0x02;
}
break;
case 7:
{
*(pOutBuffer + ByteSit) |= 0x01;
}
break;
default:
break;
}
}
++BitSit;
if (BitSit >= 8)
{
BitSit = 0;
++ByteSit;
}
}
}
if (BitSit > 0)
{
++ByteSit;
}
printf("编码完成\n");
DestroyTree(pRoot);
fclose(orgfp);
orgfp = nullptr;
FILE *outfp = nullptr;
string OutFileName = argv[1];
OutFileName.pop_back();
OutFileName.pop_back();
OutFileName.pop_back();
OutFileName.pop_back();
OutFileName += ".hfc";
fopen_s(&outfp, OutFileName.c_str(), "wb");
if (outfp)
{
printf("写入文件\n");
fwrite(CodeNum, sizeof(int), 256, outfp);
fwrite(&orgfilesize, sizeof(int), 1, outfp);
fwrite(&ByteSit, sizeof(int), 1, outfp);
fwrite(pOutBuffer, sizeof(unsigned char), ByteSit, outfp);
printf("编码完成!");
fclose(outfp);
outfp = nullptr;
}
else
{
printf("目标编码文件打开失败,无法完成编码!\n");
}
if (pOutBuffer)
{
delete[] pOutBuffer;
pOutBuffer = nullptr;
}
}
else
{
printf("你想要编码的文件不存在或者暂时无法打开!");
}
getchar();
return 0;
}