Huffman编码实现

最新推荐文章于 2023-07-03 15:30:00 发布

进步小二郎

最新推荐文章于 2023-07-03 15:30:00 发布

阅读量502

点赞数 1

分类专栏：简单算法文章标签：压缩编码算法

本文链接：https://blog.csdn.net/kaige2111/article/details/50985517

版权

简单算法专栏收录该内容

4 篇文章 0 订阅

订阅专栏

1. Huffman概述

是一种无损压缩编码方式
根据字符出现的概率编码，对概率高的字符使用较短的编码，对概率低的字符使用较长的编码，从而使得编码后的字符串的长度的期望最小
是一种贪心算法：每次总选择两个概率最小的字符结点进行合并
选择用频数代替频率，方便计算
Huffman编码不唯一
下图是计算过程示例：

2. 编程实现过程

(1) 对给定的数组进行字符出现频率统计。

(2) 剔除掉统计结果中频数为0的数值，对数组进行整理，并将不为0的字符压入vector，用于后续遍历。

(3) 由剔除后的数组进行HuffmanTree的创建，其中涉及查找数据组中最小的两个值问题，将当前最小的两个结点其进行合并，依此填入后续数组中。（注：N个数据，生成HuffmanTree后，共计2N-1个结点）。

(4) 完成HuffmanTree的建立后，依次遍历N个叶结点，每次都查找至根结点，并进行规则设定，例如本代码中令左孩子为0，右孩子为1。本代码采用二维vector进行Huffman编码存储。

(5) 依次输出显示。

3. 代码

#include "stdafx.h"
#include <vector>
#include <iostream>

using namespace std;

//定义结构体，HuffmanTree中的结点
typedef struct tagHuffmanNode 
{
    int nWeight;
    int nLeft;
    int nRight;
    int nParent;
}HuffmanNode;

//计算字符出现的次数，等效于频率
void CalcFrequency(char* str, int* pWeight)
{
    while (*str)
    {
        pWeight[*str]++;
        str++;
    }
}

//对出现为0的字符进行剔除
void CalcExistChar(int* pWeight, int N, vector<int>& pChar)
{
    int j = 0;
    for (int i=0; i<N; i++)
    {
        if (pWeight[i] != 0)
        {
            pChar.push_back(i);       //对频数不等于0的字符，压入vector中，由于后期的对应显示
            if (j != i)
            {
                pWeight[j] = pWeight[i];
            }
            j++;
        }
    }
    //将剩余的进行清零操作
    for (j; j<N; j++)
    {
        pWeight[j] = 0;
    }
}


//选择最小的两个值
void SelectNode(HuffmanNode* pHuffmanTree, int n, int &nMin1, int &nMin2)
{
    nMin1 = -1;
    nMin2 = -1;
    int nTempWeight1 = -1;
    int nTempWeight2 = -1;
    for (int i=0; i<n; i++)
    {
        if ((pHuffmanTree[i].nParent == 0) && (pHuffmanTree[i].nWeight) > 0)
        {
            if ((nMin1 <0) || (nTempWeight1 > pHuffmanTree[i].nWeight))
            {
                nMin2 = nMin1;
                nTempWeight2 = nTempWeight1;
                nMin1 = i;
                nTempWeight1 = pHuffmanTree[i].nWeight;
            }
            else if ((nMin2 < 0) || (nTempWeight2 > pHuffmanTree[i].nWeight))
            {
                nMin2 = i;
                nTempWeight2 = pHuffmanTree[i].nWeight;
            }   
        }
    }
}

void HuffmanCoding(int* pWeight, int N, vector<vector<char>>& code)
{
    if (N <= 0)
        return;

    int m = 2*N - 1;    //由N个结点生成的Huffman树，共计2N-1个结点
    HuffmanNode* pHuffmanTree = new HuffmanNode[m];
    for (int i=0; i<m; i++)
    {
        pHuffmanTree[i].nLeft = 0;
        pHuffmanTree[i].nParent = 0;
        pHuffmanTree[i].nRight = 0;
        pHuffmanTree[i].nWeight = 0;
    }
    int nMin1, nMin2; //定义最小的两个数值

    int i;
    //建立叶子结点
    for (i=0; i<N; i++)
    {
        pHuffmanTree[i].nWeight = pWeight[i];
    }

    //每次选择现有结点中权值最小的两个结点，创建数
    for (i=N; i<m; i++)
    {
        SelectNode(pHuffmanTree, i, nMin1, nMin2);
        pHuffmanTree[nMin1].nParent = i;
        pHuffmanTree[nMin2].nParent = i;
        pHuffmanTree[i].nWeight = pHuffmanTree[nMin1].nWeight + pHuffmanTree[nMin2].nWeight;
        pHuffmanTree[i].nLeft = nMin1;
        pHuffmanTree[i].nRight = nMin2;
    }

    //针对HuffmanTree遍历叶结点，向上索引至根节点
    int nChild, nParent;
    for(int i=0; i<N; i++)
    {
        vector<char>& cur = code[i];
        nParent = pHuffmanTree[i].nParent;
        nChild = i;
        while(nParent != 0)
        {
            if (pHuffmanTree[nParent].nLeft == nChild)
            {
                cur.push_back('0');
            }
            else if(pHuffmanTree[nParent].nRight == nChild)
            {
                cur.push_back('1');
            }
            nChild = nParent;
            nParent = pHuffmanTree[nParent].nParent;
        }
        reverse(cur.begin(), cur.end());
    }

}

void PrintCode(char c, vector<char>& code)
{
    cout << (int)c << "\t" << c << ":\t";
    for (vector<char>::iterator it=code.begin(); it!=code.end(); it++)
    {
        cout << *it;
    }
    cout <<endl;
}

void Print(vector<vector<char>>& code, vector<int>& pChar)
{
    int nSize = (int)code.size();
    for (int i = 0; i < nSize; i++)
    {
        PrintCode(pChar[i], code[i]);
    }
}

int _tmain(int argc, _TCHAR* argv[])
{
    char text[] = "every night in my dreams \
        i see you, i feel you,\
        that is how i know you go on \
        far across the distance  \
        and spaces between us  \
        you have come to show you go on \
        near, far, wherever you are \
        i believe that the heart does go on \
        once more you open the door \
        and you're here in my heart \
        and my heart will go on and on \
        love can touch us one time \
        and last for a lifetime \
        and never let go till we're one \
        love was when i loved you \
        one true time i hold to \
        in my life we'll always go on \
        near, far, wherever you are \
        i believe that the heart does go on \
        once more you open the door \
        and you're here in my heart \
        and my heart will go on and on \
        there is some love that will not go away \
        you're here, there's nothing i fear,\
        and i know that my heart will go on \
        we'll stay forever this way \
        you are safe in my heart \
        and my heart will go on and on";

    const int N = 256;
    int pWeight[N] = {0};
    CalcFrequency(text, pWeight);
    pWeight['\t'] = 0;
    vector<int> pChar;
    CalcExistChar(pWeight, N, pChar);
    int N2 = (int)pChar.size();
    vector<vector<char>> code(N2);
    HuffmanCoding(pWeight, N2, code);
    Print(code, pChar);

    system("pause");

    return 0;
}

4. 程序运行结果

这里写图片描述

5. 注意

以下任一种情况均会导致，同一文件压缩编码可能不一样：

(1) 在进行查找最小的两个数值时，可能同时存在多个频数相同的数值；

(2) 在生成编码时定义的左0右1、左1右0规则不同。

进步小二郎

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Huffman编码实现

Huffman概述 (1) 是一种无损压缩编码方式 (2) 根据字符出现的概率编码，对概率高的字符使用较短的编码，对概率低的字符使用较长的编码，从而使得编码后的字符串的长度的期望最小 (3) 是一种贪心算法：每次总选择两个概率最小的字符结点进行合并 (4) 选择用频数代替频率，方便计算 (5) Huffman编码不唯一
复制链接

扫一扫