哈夫曼编码

哈夫曼编码作用:

1、压缩文件(当文件越大,且里面出现重复字符的概率越高的时候压缩越明显;但如果文件很小的话可以不会压缩,反而比原文件更大);

2、文件简单加密;

编码步骤

1、读取文件中的每种字符出现的次数写入CodeNum[256]中。

2、根据CodeNum来构建哈夫曼树。

        2.1、拷贝一份CodeNum,命名为_CodeNum;

        2.2、每次在_CodeNum中找到两个出现次数最少的来进行组合作为一个结点,其结点的使用次数为其两者之和,把原来两个使用次数清空,把再结点放进_CodeNum中使用次数为0的地方。

        2.3、不断重复2.2直至_CodeNum中有使用次数的数据只剩一个;

3、根据哈夫曼树来对文件中的字符进行编码,并且记录一共有多少个字符,以及哈夫曼编码的字节数。

        3.1、从文件的头字符开始,从哈夫曼树的根结点往下遍历,直到找到对应的字符结点,把其该字符的哈夫曼编码记录下来(按照二进制的方法记录,以bit位单位,字节为进位);直到把整个文件给读取完;

4、把CodeNum、原文件的字符个数,哈夫曼编码的大小、哈夫曼编码写入一个新文件中,这个就是哈夫曼编码

代码实现

 struct HufNode 
 {
 	unsigned char c;
 	char code;
 	int usetime;
 	//
 	HufNode *pParent;
 	HufNode *pLc;
 	HufNode *pRc;
 };

 //拷贝数组
 struct HufArrayNode
 {
	 int usetimes;
	 HufNode *pRoot;
 };

 //进行数组和树之间的转接
struct HufData
{
	unsigned char c;
	int usetime;
	HufNode *pRoot;
};

struct HufCodeData
{
	char code[256];
	int length;
};

bool FindMinHufDataInHufArray(HufArrayNode *pHufArray, HufData &hufdata)
{
	hufdata.c = '\0';
	hufdata.usetime = 0;
	
	for (int i = 0; i < 256; ++i)
	{
		if (hufdata.usetime == 0)
		{
			if (pHufArray[i].usetimes> 0)
			{
				hufdata.c = i;
				hufdata.usetime = pHufArray[i].usetimes;
				hufdata.pRoot = pHufArray[i].pRoot;
			}
		}
		else
		{
			if (pHufArray[i].usetimes>0)
			{
				if (pHufArray[i].usetimes < hufdata.usetime)
				{
					hufdata.c = i;
					hufdata.usetime = pHufArray[i].usetimes;
					hufdata.pRoot = pHufArray[i].pRoot;
				}
			}
		}
	}

	if (hufdata.usetime > 0)
	{
		pHufArray[hufdata.c].usetimes = 0;
		pHufArray[hufdata.c].pRoot = nullptr;
		return true;
	}
	else
	{
		return false;
	}
}
bool FindMinOver(HufArrayNode const * const pHufArray)
{
	int num = 0;
	for (int i = 0; i < 256;++i)
	{
		if (pHufArray[i].usetimes>0)
		{
			++num;
		}
	}
	return num == 1;
}
bool FindHufCodeInHufTree(HufNode *pRoot, unsigned char c, HufCodeData &hufcodedata)
{
	bool ret = false;
	if (pRoot->pLc&&!ret)
	{
		hufcodedata.code[hufcodedata.length++] = 1;
		ret = FindHufCodeInHufTree(pRoot->pLc, c, hufcodedata);
		if (!ret)
		{
			--hufcodedata.length;
		}
	}

	if (pRoot->pRc&&!ret)
	{
		hufcodedata.code[hufcodedata.length++] = 0;
		ret = FindHufCodeInHufTree(pRoot->pRc, c, hufcodedata);
		if (!ret)
		{
			--hufcodedata.length;
		}
	}

	if ((pRoot->pRc == nullptr)&&(pRoot->pLc ==nullptr))
	{
		if (pRoot->c == c)
		{
			ret = true;
		}
	}

	return ret;
}
void DestroyTree(HufNode *pRoot)
{
	if (pRoot->pLc)
	{
		DestroyTree(pRoot->pLc);
	}
	if (pRoot->pRc)
	{
		DestroyTree(pRoot->pRc);
	}

	if (pRoot)
	{
		delete pRoot;
		pRoot = nullptr;
	}
}

HufNode *pRoot = nullptr;
int _tmain(int argc, _TCHAR* argv[])
{
	if (argc < 2)
	{
		printf("没有待编码文件\n");
		getchar();
		return 0;
	}

	FILE *orgfp = nullptr; 
	fopen_s(&orgfp, argv[1], "rb");
	if (orgfp)
	{
		printf("开始哈夫曼编码\n");
		int CodeNum[256] = { 0 };
		HufArrayNode _CodeNum[256];

		unsigned char _SingleChar = '\0';

		//fread返回值为0代表文件结束或发生错误,不然表示成功读取多少个元素
		while (fread(&_SingleChar,sizeof(unsigned char),1, orgfp))
		{
			++CodeNum[_SingleChar];
		}

		for (int i = 0; i < 256;++i)
		{
			_CodeNum[i].usetimes = CodeNum[i];
			_CodeNum[i].pRoot = nullptr;
		}

		HufData hufdata;
		printf("开始生成树\n");
		while (!FindMinOver(_CodeNum))
		{
			HufNode *pTempRc;
			HufNode *pTempLc;
			HufNode *pTempRoot;
			//右节点
			if (FindMinHufDataInHufArray(_CodeNum, hufdata))
			{
				if (hufdata.pRoot)
				{
					pTempRc = hufdata.pRoot;
					pTempRc->c = 0;
					pTempRc->code = 0;
				}
				else
				{
					pTempRc = new HufNode;
					pTempRc->c = hufdata.c;
					pTempRc->usetime = hufdata.usetime;
					pTempRc->code = 0;
					pTempRc->pParent = nullptr;
					pTempRc->pLc = nullptr;
					pTempRc->pRc = nullptr;
				}
			}
			//左节点
			if (FindMinHufDataInHufArray(_CodeNum, hufdata))
			{
				if (hufdata.pRoot)
				{
					pTempLc = hufdata.pRoot;
					pTempLc->c = 0;
					pTempLc->code = 1;
				}
				else
				{
					pTempLc = new HufNode;
					pTempLc->c = hufdata.c;
					pTempLc->usetime = hufdata.usetime;
					pTempLc->code = 1;
					pTempLc->pParent = nullptr;
					pTempLc->pLc = nullptr;
					pTempLc->pRc = nullptr;

				}
			}

			pTempRoot = new HufNode;
			pTempRoot->c = 0;
			pTempRoot->usetime = pTempLc->usetime + pTempRc->usetime;
			pTempRoot->code = 0;
			pTempRoot->pParent = nullptr;
			pTempRoot->pLc = pTempLc;
			pTempRoot->pRc = pTempRc;

			pTempLc->pParent = pTempRoot;
			pTempRc->pParent = pTempRoot;

			for (int i = 0; i < 256;++i)
			{
				if (_CodeNum[i].usetimes == 0)
				{
					_CodeNum[i].usetimes = pTempRoot->usetime;
					_CodeNum[i].pRoot = pTempRoot;
					break;
				}
			}
		}

		if (FindMinHufDataInHufArray(_CodeNum, hufdata))
		{
			pRoot = hufdata.pRoot;
		}
		printf("成功生成树\n");

		printf("开始编码\n");
		rewind(orgfp);
		fseek(orgfp, 0, SEEK_END);
		int orgfilesize = ftell(orgfp);
		unsigned char *pOutBuffer = new unsigned char[256 / 8 * orgfilesize];
		memset(pOutBuffer, 0, sizeof(unsigned char)* (256 / 8 * orgfilesize));
		int ByteSit = 0;
		int BitSit = 0;

		HufCodeData hufcodearray[256];
		for (int i = 0; i < 256; ++i)
		{
			hufcodearray[i].length = -1;
		}

		rewind(orgfp);
		
		while (fread(&_SingleChar, sizeof(unsigned char), 1, orgfp))
		{
			HufCodeData hufcodedata;
			hufcodedata.length = 0;
			if (hufcodearray[_SingleChar].length<0)
			{		
				FindHufCodeInHufTree(pRoot, _SingleChar, hufcodedata);
				hufcodearray[_SingleChar] = hufcodedata;
			}
			else
			{
				hufcodedata = hufcodearray[_SingleChar];
			}

			for (int l = 0; l < hufcodedata.length; ++l)
			{
				if (hufcodedata.code[l])
				{
					switch (BitSit)
					{
					case 0:
					{
							  *(pOutBuffer + ByteSit) |= 0x80;
					}
						break;
					case 1:
					{
							  *(pOutBuffer + ByteSit) |= 0x40;
					}
						break;
					case 2:
					{
							  *(pOutBuffer + ByteSit) |= 0x20;
					}
						break;
					case 3:
					{
							  *(pOutBuffer + ByteSit) |= 0x10;
					}
						break;
					case 4:
					{
							  *(pOutBuffer + ByteSit) |= 0x08;
					}
						break;
					case 5:
					{
							  *(pOutBuffer + ByteSit) |= 0x04;
					}
						break;
					case 6:
					{
							  *(pOutBuffer + ByteSit) |= 0x02;
					}
						break;
					case 7:
					{
							  *(pOutBuffer + ByteSit) |= 0x01;
					}
						break;
					default:
						break;
					}
				}
				++BitSit;
				if (BitSit >= 8)
				{
					BitSit = 0;
					++ByteSit;
				}
			}
		}

		if (BitSit > 0)
		{
			++ByteSit;
		}
		printf("编码完成\n");

		DestroyTree(pRoot);
		fclose(orgfp);
		orgfp = nullptr;

		
		FILE *outfp = nullptr;
		string OutFileName = argv[1];
		OutFileName.pop_back();
		OutFileName.pop_back();
		OutFileName.pop_back();
		OutFileName.pop_back();	
		OutFileName += ".hfc";
		fopen_s(&outfp, OutFileName.c_str(), "wb");
		if (outfp)
		{
			printf("写入文件\n");
			fwrite(CodeNum, sizeof(int), 256, outfp);
			fwrite(&orgfilesize, sizeof(int), 1, outfp);
			fwrite(&ByteSit, sizeof(int), 1, outfp);
			fwrite(pOutBuffer, sizeof(unsigned char), ByteSit, outfp);
			printf("编码完成!");

			fclose(outfp);
			outfp = nullptr;
		}
		else
		{
			printf("目标编码文件打开失败,无法完成编码!\n");
		}

		if (pOutBuffer)
		{
			delete[] pOutBuffer;
			pOutBuffer = nullptr;
		}
	
	}
	else
	{
		printf("你想要编码的文件不存在或者暂时无法打开!");
	}
	getchar();
	return 0;
}

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值