算法 - Huffman编码

一、需求分析
  • 统计一篇文章中各个字符出现的频率,按照统计所得的频率使用Huffman算法编码文章,输出编码之后的文件以及Huffman编码
  • 再次解码之前的编码文件,判断编解码前后的文件是否一致,以判断算法的正确性
二、Huffman算法描述
  • Huffman编码法被称为最优编码法,所获得的编码为无重复前缀码,编码的依据是被编码的字符在整个原文中出现的频率
  • Huffman树的生成过程:
    1. 首先统计出各个字符出现的频率作为权值,然后每个字符对应创建一个节点,节点保存字符值、权值。
    2. 取出所有节点中权值最大的节点,合并成为一个节点,该节点的权值为两个子节点权值的和,该节点的左右子节点分别为之前取出来的两个最大子节点。将该节点重新放入所有节点中。
    3. 重复 2 步骤,直到剩下一个节点,则所有节点构成一个Huffman树,该节点为树根
  • 编码过程(Huffman树的遍历):
    1. DFS:可使用递归函数,默认向左为0,向右为1
    2. BFS:使用队列实现,默认向左为0,向右为1
  • 解码过程:
    1. 根据Huffman树解码Huffman编码最为简单
    2. 从树根开始,根据Huffman编码0向左,1向右终点便是字符所在节点
三、数据表示
  • 词频统计的过程使用map<char, string> 保存键值对类型的数据
  • 节点每次的排序使用优先队列 priority_queue<Node, vector<Node>, greater<Node>>
四、问题与解决方法
  • 读文件时使用 while(in.get() != EOF) 判断文件结束是会多读一个字符
    • 原因:其实 EOF(end of file) 这个文件结束标志是在文件的最后一个字符之后的,当读入最后一个字符的时候其实文件并没有读到 EOF 标志,只有下一次读的时候才会读到 EOF,但是在读到 EOF 的时候如果未加处理还是会把 EOF 的值看做一个字符输出,故总会多输出一个字符

    • 解决方法:使用 peek() 函数,该函数获取下个字符的值但不移动文件指针,故可在最后一个字符的时候知道下个字符为 EOF

        while (in.peek() != EOF)
      {
          char c = in.get();
          cout << c;
      }
      
五、实验代码
#include <iostream>
#include <fstream>
#include <queue>
#include <map>
#include <utility>
#include <algorithm>

using namespace ::std;

struct Node
{
    bool isChild = false;
    char symbol = 0;

    string encode = "";

    int weight = 0;
    Node *parent = NULL;
    Node *leftChild = NULL;
    Node *rightChild = NULL;

    Node(char symbol, int weight) : symbol(symbol), weight(weight), isChild(true)
    {
    }

    Node(int weight, Node *leftChild, Node *rightChild)
        : weight(weight), leftChild(leftChild), rightChild(rightChild)
    {
    }

    Node(const Node &node)
        : symbol(node.symbol), encode(node.encode), weight(node.weight),
          parent(node.parent), leftChild(node.leftChild), rightChild(node.rightChild),
          isChild(node.isChild)
    {
    }

    friend bool operator>(const Node n1, const Node n2)
    {
        return (n1.weight > n2.weight);
    }
};

/**
 * 统计文章中每个字符出现的频率,作为生成huffman树的权值
 */
void letterFrequencyCount(ifstream &in, map<char, int> &letterMap)
{
    cout << "open file successed" << endl;

    while (in.peek() != EOF)
    {
        char c = in.get();
        if (letterMap.count(c) == 0)
            letterMap.insert(make_pair(c, 1));
        else
            ++letterMap.find(c)->second;
    }
    map<char, int>::iterator iter;
    for (iter = letterMap.begin(); iter != letterMap.end(); ++iter)
    {
        cout << '[' << '\'' << iter->first << '\'' << " - " << iter->second << ']' << endl;
    }
    cout << "Word frequency statistics successed" << endl;
}


Node *getHuffmanTree(map<char, int> &letterMap)
{
    cout << "calculating huffman encode" << endl;

    priority_queue<Node, vector<Node>, greater<Node>> letterQueue;

    for (map<char, int>::iterator iter = letterMap.begin(); iter != letterMap.end(); ++iter)
    {
        letterQueue.push(Node(iter->first, iter->second));
    }

    //构造huffman树,使用优先队列
    while (letterQueue.size() > 1)
    {
        Node *leftChild = new Node(letterQueue.top());
        letterQueue.pop();
        if (leftChild->leftChild != NULL)
        {
            leftChild->leftChild->parent = leftChild;
            leftChild->rightChild->parent = leftChild;
        }
        Node *rightChild = new Node(letterQueue.top());
        letterQueue.pop();
        if (rightChild->leftChild != NULL)
        {
            rightChild->leftChild->parent = rightChild;
            rightChild->rightChild->parent = rightChild;
        }
        letterQueue.push(Node(leftChild->weight + rightChild->weight, leftChild, rightChild));
    }
    Node *huffmanTree = new Node(letterQueue.top());
    if (huffmanTree->leftChild != NULL && huffmanTree->rightChild != NULL)
    {
        huffmanTree->leftChild->parent = huffmanTree;
        huffmanTree->rightChild->parent = huffmanTree;
    }
    return huffmanTree;
}

void getHuffmanCode(Node *huffmanTree, map<char, string> &letterKeyMap, int len)
{
    //bfs
    queue<Node *> nodeQueue;
    nodeQueue.push(huffmanTree);
    while (!nodeQueue.empty())
    {
        Node *currentNode = nodeQueue.front();
        nodeQueue.pop();
        
        if (!currentNode->isChild)
        {
            currentNode->leftChild->encode = currentNode->encode + '0';
            currentNode->rightChild->encode = currentNode->encode + '1';
            nodeQueue.push(currentNode->leftChild);
            nodeQueue.push(currentNode->rightChild);
        }
        else
        {
            if (currentNode->parent == NULL)
            {
                currentNode->encode = "0";
                letterKeyMap.insert(make_pair(currentNode->symbol, currentNode->encode));
            }
            else
                letterKeyMap.insert(make_pair(currentNode->symbol, currentNode->encode));
        }
    }

    for (map<char, string>::iterator iter = letterKeyMap.begin(); iter != letterKeyMap.end(); ++iter)
    {
        cout << '[' << '\'' << iter->first << '\'' << " - " << iter->second << ']' << endl;
    }
}

void huffmanEncoder(map<char, string> &letterKeyMap, ifstream &in, ofstream &out)
{
    cout << "Starting encoding file......" << endl;
    while (in.peek() != EOF)
    {
        char c = in.get();
        out << letterKeyMap[c];
    }
    cout << "Encoding file successed" << endl;
}

void huffmanDecoder(Node *huffmanTree, ifstream &in, ofstream &out)
{
    cout << "Starting decoding file......" << endl;
    Node *currentNode = huffmanTree;
    while (in.peek() != EOF)
    {
        char c = in.get();
        if (currentNode == huffmanTree && currentNode->isChild)
        {
            out << currentNode->symbol;
            continue;
        }
        if (c == '0')
            currentNode = currentNode->leftChild;
        else
            currentNode = currentNode->rightChild;
        if (currentNode->isChild)
        {
            out << currentNode->symbol;
            currentNode = huffmanTree;
        }
    }
    cout << "decoding file successed" << endl;
}

int main(int argc, char const *argv[])
{
    map<char, int> letterMap;
    map<char, string> letterKeyMap;

    ifstream in("letter.dat");
    if (!in)
    {
        cout << "open file failed" << endl;
        exit(0);
    }

    letterFrequencyCount(in, letterMap);
    in.clear();
    in.seekg(ios::beg);

    Node *huffmanTree = getHuffmanTree(letterMap);
    getHuffmanCode(huffmanTree, letterKeyMap, letterMap.size());

    in.clear();
    in.seekg(ios::beg);
    ofstream encodeOut("letter-encode.dat");
    if (!encodeOut)
    {
        cout << "open encode-out file failed" << endl;
        exit(0);
    }
    huffmanEncoder(letterKeyMap, in, encodeOut);
    encodeOut.flush();
    encodeOut.close();
    in.close();

    ifstream decodeIn("letter-encode.dat");
    ofstream decodeOut("letter-decode.dat");
    if (!decodeIn)
    {
        cout << "open decode-in file failed" << endl;
        exit(0);
    }
    if (!decodeIn)
    {
        cout << "open decode-out file failed" << endl;
        exit(0);
    }
    huffmanDecoder(huffmanTree, decodeIn, decodeOut);
    decodeIn.close();
    decodeOut.flush();
    decodeOut.close();

    return 0;
}
  • 1
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值