Huffman编码实现

1. Huffman概述

  • 是一种无损压缩编码方式
  • 根据字符出现的概率编码,对概率高的字符使用较短的编码,对概率低的字符使用较长的编码,从而使得编码后的字符串的长度的期望最小
  • 是一种贪心算法:每次总选择两个概率最小的字符结点进行合并
  • 选择用频数代替频率,方便计算
  • Huffman编码不唯一
  • 下图是计算过程示例:

    这里写图片描述

2. 编程实现过程

(1) 对给定的数组进行字符出现频率统计。

(2) 剔除掉统计结果中频数为0的数值,对数组进行整理,并将不为0的字符压入vector,用于后续遍历。

(3) 由剔除后的数组进行HuffmanTree的创建,其中涉及查找数据组中最小的两个值问题,将当前最小的两个结点其进行合并,依此填入后续数组中。(注:N个数据,生成HuffmanTree后,共计2N-1个结点)。

(4) 完成HuffmanTree的建立后,依次遍历N个叶结点,每次都查找至根结点,并进行规则设定,例如本代码中令左孩子为0,右孩子为1。本代码采用二维vector进行Huffman编码存储。

(5) 依次输出显示。

3. 代码

#include "stdafx.h"
#include <vector>
#include <iostream>

using namespace std;

//定义结构体,HuffmanTree中的结点
typedef struct tagHuffmanNode 
{
    int nWeight;
    int nLeft;
    int nRight;
    int nParent;
}HuffmanNode;

//计算字符出现的次数,等效于频率
void CalcFrequency(char* str, int* pWeight)
{
    while (*str)
    {
        pWeight[*str]++;
        str++;
    }
}

//对出现为0的字符进行剔除
void CalcExistChar(int* pWeight, int N, vector<int>& pChar)
{
    int j = 0;
    for (int i=0; i<N; i++)
    {
        if (pWeight[i] != 0)
        {
            pChar.push_back(i);       //对频数不等于0的字符,压入vector中,由于后期的对应显示
            if (j != i)
            {
                pWeight[j] = pWeight[i];
            }
            j++;
        }
    }
    //将剩余的进行清零操作
    for (j; j<N; j++)
    {
        pWeight[j] = 0;
    }
}


//选择最小的两个值
void SelectNode(HuffmanNode* pHuffmanTree, int n, int &nMin1, int &nMin2)
{
    nMin1 = -1;
    nMin2 = -1;
    int nTempWeight1 = -1;
    int nTempWeight2 = -1;
    for (int i=0; i<n; i++)
    {
        if ((pHuffmanTree[i].nParent == 0) && (pHuffmanTree[i].nWeight) > 0)
        {
            if ((nMin1 <0) || (nTempWeight1 > pHuffmanTree[i].nWeight))
            {
                nMin2 = nMin1;
                nTempWeight2 = nTempWeight1;
                nMin1 = i;
                nTempWeight1 = pHuffmanTree[i].nWeight;
            }
            else if ((nMin2 < 0) || (nTempWeight2 > pHuffmanTree[i].nWeight))
            {
                nMin2 = i;
                nTempWeight2 = pHuffmanTree[i].nWeight;
            }   
        }
    }
}

void HuffmanCoding(int* pWeight, int N, vector<vector<char>>& code)
{
    if (N <= 0)
        return;

    int m = 2*N - 1;    //由N个结点生成的Huffman树,共计2N-1个结点
    HuffmanNode* pHuffmanTree = new HuffmanNode[m];
    for (int i=0; i<m; i++)
    {
        pHuffmanTree[i].nLeft = 0;
        pHuffmanTree[i].nParent = 0;
        pHuffmanTree[i].nRight = 0;
        pHuffmanTree[i].nWeight = 0;
    }
    int nMin1, nMin2; //定义最小的两个数值

    int i;
    //建立叶子结点
    for (i=0; i<N; i++)
    {
        pHuffmanTree[i].nWeight = pWeight[i];
    }

    //每次选择现有结点中权值最小的两个结点,创建数
    for (i=N; i<m; i++)
    {
        SelectNode(pHuffmanTree, i, nMin1, nMin2);
        pHuffmanTree[nMin1].nParent = i;
        pHuffmanTree[nMin2].nParent = i;
        pHuffmanTree[i].nWeight = pHuffmanTree[nMin1].nWeight + pHuffmanTree[nMin2].nWeight;
        pHuffmanTree[i].nLeft = nMin1;
        pHuffmanTree[i].nRight = nMin2;
    }

    //针对HuffmanTree遍历叶结点,向上索引至根节点
    int nChild, nParent;
    for(int i=0; i<N; i++)
    {
        vector<char>& cur = code[i];
        nParent = pHuffmanTree[i].nParent;
        nChild = i;
        while(nParent != 0)
        {
            if (pHuffmanTree[nParent].nLeft == nChild)
            {
                cur.push_back('0');
            }
            else if(pHuffmanTree[nParent].nRight == nChild)
            {
                cur.push_back('1');
            }
            nChild = nParent;
            nParent = pHuffmanTree[nParent].nParent;
        }
        reverse(cur.begin(), cur.end());
    }

}

void PrintCode(char c, vector<char>& code)
{
    cout << (int)c << "\t" << c << ":\t";
    for (vector<char>::iterator it=code.begin(); it!=code.end(); it++)
    {
        cout << *it;
    }
    cout <<endl;
}

void Print(vector<vector<char>>& code, vector<int>& pChar)
{
    int nSize = (int)code.size();
    for (int i = 0; i < nSize; i++)
    {
        PrintCode(pChar[i], code[i]);
    }
}

int _tmain(int argc, _TCHAR* argv[])
{
    char text[] = "every night in my dreams \
        i see you, i feel you,\
        that is how i know you go on \
        far across the distance  \
        and spaces between us  \
        you have come to show you go on \
        near, far, wherever you are \
        i believe that the heart does go on \
        once more you open the door \
        and you're here in my heart \
        and my heart will go on and on \
        love can touch us one time \
        and last for a lifetime \
        and never let go till we're one \
        love was when i loved you \
        one true time i hold to \
        in my life we'll always go on \
        near, far, wherever you are \
        i believe that the heart does go on \
        once more you open the door \
        and you're here in my heart \
        and my heart will go on and on \
        there is some love that will not go away \
        you're here, there's nothing i fear,\
        and i know that my heart will go on \
        we'll stay forever this way \
        you are safe in my heart \
        and my heart will go on and on";

    const int N = 256;
    int pWeight[N] = {0};
    CalcFrequency(text, pWeight);
    pWeight['\t'] = 0;
    vector<int> pChar;
    CalcExistChar(pWeight, N, pChar);
    int N2 = (int)pChar.size();
    vector<vector<char>> code(N2);
    HuffmanCoding(pWeight, N2, code);
    Print(code, pChar);

    system("pause");

    return 0;
}

4. 程序运行结果

这里写图片描述

5. 注意

以下任一种情况均会导致,同一文件压缩编码可能不一样

(1) 在进行查找最小的两个数值时,可能同时存在多个频数相同的数值;

(2) 在生成编码时定义的左0右1、左1右0规则不同。

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值