哈夫曼树 pta

最新推荐文章于 2023-12-11 21:06:52 发布

wzx_Eleven

最新推荐文章于 2023-12-11 21:06:52 发布

阅读量1.2k

点赞数

文章标签：算法 c++ 数据结构

本文链接：https://blog.csdn.net/wzx_442011334/article/details/125219253

版权

编写一个哈夫曼编码译码程序。针对一段文本，根据文本中字符出现频率构造哈夫曼树，给出每个字符的哈夫曼编码，并进行译码，计算编码前后文本大小。
为确保构建的哈夫曼树唯一，本题做如下限定：

选择根结点权值最小的两棵二叉树时，选取权值较小者作为左子树。
若多棵二叉树根结点权值相等，则先生成的作为左子树，后生成的作为右子树，具体来说：i) 对于单结点二叉树，优先选择根结点对应字母在文本中最先出现者，如文本为cba，三个字母均出现1次，但c在文本中最先出现，b第二出现，故则选择c作为左子树，b作为右子树。ii) 对于非单结点二叉树，先生成的二叉树作为左子树，后生成的二叉树作为右子树。iii. 若单结点和非单结点二叉树根结点权值相等，优先选择单结点二叉树。
生成哈夫曼编码时，哈夫曼树左分支标记为0，右分支标记为1。

输入格式:

输入为3行。第1行为一个字符串，包含不超过5000个字符，至少包含两个不同的字符，每个字符为a-z的小写字母。第2、3行为两个由0、1组成的字符串，表示待译码的哈夫曼编码。

输出格式:

输出第一行为用空格间隔的2个整数，分别为压缩前后文本大小，以字节为单位，一个字符占1字节，8个二进制位占1字节，若压缩后文本不足8位，则按1字节算。输出从第二行开始，每行为1个字符的哈夫曼编码，按各字符在文本中出现次数递增顺序输出，若多个字符出现次数相同，则按其在文本出现先后排列。每行格式为“字母:编码”。最后两行为两行字符串，表示译码结果，若译码失败，则输出INVALID。

输入样例:

cbaxyyzz
0100
011

输出样例:

8 3
c:100
b:101
a:110
x:111
y:00
z:01
zy
INVALID

代码

#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <map>
#include <queue>
#include <vector>
using namespace std;
struct node
{
    char c; //字符
    int hz; //频率
    int p;  //优先级
    node *left;
    node *right;
};
struct cmp
{ //先按字符出现的频率从小到大排列，如果相同就按优先级从小到大排列
    bool operator()(const node *a, const node *b)
    {
        if (a->hz != b->hz)
            return a->hz > b->hz;
        else
            return a->p > b->p;
    }
};
string x, y, z;
int idx, sum;                                     // idx用于设置优先级，sum记录原序列译码后的总长度
map<char, int> mp, np;                            // mp记录字符出现的频率，np记录字符的优先级
vector<char> ma;                                  //用于译码
priority_queue<node *, vector<node *>, cmp> q, r; // q用于构造哈夫曼树，r用于按字符出现的顺序输出编码
map<char, vector<int>> cp;                        //记录每个字符的编码
vector<int> v;                                    //遍历哈夫曼树生成每个字符的编码
void dfs(node *root)                              //为每个字符进行编码
{
    if (root)
    {
        if (islower(root->c))
            cp[root->c] = v;
        v.push_back(0);
        dfs(root->left);
        v.pop_back();
        v.push_back(1);
        dfs(root->right);
        v.pop_back();
    }
}
void translation(string x) //译码函数
{
    ma.clear();
    node *t = new node;
    t = q.top();
    bool flag = true;
    for (int i = 0; i < x.size(); i++)
    {
        if (x[i] == '0')
            t = t->left;
        else
            t = t->right;
        if ((i == (x.size() - 1)) && isupper(t->c))
            flag = false;
        if (islower(t->c))
        {
            ma.push_back(t->c);
            t = q.top();
        }
    }
    if (flag)
        for (auto t : ma)
            cout << t;
    else
        cout << "INVALID";
    cout << endl;
}
int main()
{
    cin >> x >> y >> z;
    for (int i = 0; i < x.size(); i++) //统计字符出现的频率和优先级
    {
        char temp = x[i];
        mp[temp]++;
        if (!np[temp])
            np[temp] = idx++;
    }
    for (auto temp : mp) //包装节点
    {
        node *t = new node;
        t->c = temp.first;
        t->hz = temp.second;
        t->p = np[temp.first];
        t->left = t->right = NULL;
        q.push(t);
        r.push(t);
    }
    while (q.size() != 1) //构造哈夫曼树
    {
        node *a = new node;
        a = q.top();
        q.pop();
        node *b = new node;
        b = q.top();
        q.pop();
        node *t = new node;
        t->c = 'S';
        t->p = idx++;
        t->hz = a->hz + b->hz;
        t->left = a;
        t->right = b;
        q.push(t);
        sum += t->hz;
    }
    cout << x.size() << " ";
    int cnt = sum / 8;
    if (cnt * 8 < sum)
        cnt++;
    cout << cnt << endl;
    dfs(q.top());
    while (r.size())
    {
        char t = r.top()->c;
        cout << t << ":";
        for (auto temp : cp[t])
            cout << temp;
        cout << endl;
        r.pop();
    }
    translation(y);
    translation(z);
    return 0;
}