题目描述
每行一个大小写英文字母组成的字符串,长度不大于 1000,通过前缀编码后最短的编码长度。
输入
每组数据一行,大小写英文字母
输出
每组数据输出赫夫曼编码长度
样例输入
AABBCCDEEEE
AAABCCC
BBACB样例输出
25
11
7
#include <iostream>
#include <cstring>
using namespace std;
const int big = 999;
struct Node
{
int parent, weight, left, right;
};
Node node[100];
int a[99];
int w[99];
char s[1005];
int root; // 赫夫曼树根节点下标
int ans; //赫夫曼树编码的长度
// 搜索权重最小的数组下标
void search(int w[], int &m1, int &m2, int len)
{
for (int i = 1; i <= len; i++)
{
if (w[m1] > w[i])
{
m1 = i;
}
}
w[m1] = big;
for (int i = 1; i <= len; i++)
{
if (w[m2] > w[i])
{
m2 = i;
}
}
w[m2] = big;
};
// 深搜求长度
void dfs(int index, int cnt)
{
int l = node[index].left;
int r = node[index].right;
if (l == -1 && r == -1)
{
// cout<<"index="<<index<<" cnt="<<cnt<<endl;
ans += node[index].weight * cnt;
}
if (l != -1)
{
dfs(l, ++cnt);
cnt--; // 注意搜索回溯的时候要将计数器 -1
}
if (r != -1)
{
dfs(r, ++cnt);
cnt--;
}
}
int main()
{
int t;
cin >> t;
while (t--)
{
memset(s, '\0', sizeof(s));
memset(a, 0, sizeof(a));
memset(w, 0, sizeof(w));
cin >> s;
for (int i = 0, l = strlen(s); i < l; i++)
{
a[s[i] - 'A']++; // 在ascill里面大写的字母比较小
}
// 初始化w数组
int wl = 1;
w[0] = big;
for (int i = 0; i < 99; i++)
{
if (a[i] != 0)
{
w[wl++] = a[i];
}
}
wl--;
root = 2 * wl - 1;
for (int i = 1; i <= root; i++)
{
node[i].parent = node[i].left = node[i].right = node[i].weight = -1;
}
for (int i = 1; i <= wl; i++)
{
node[i].weight = w[i];
}
// 建树
for (int i = wl + 1; i <= root; i++)
{
int m1 = 0, m2 = 1;
search(w, m1, m2, wl);
node[i].left = m1;
node[i].right = m2;
node[i].weight = node[m1].weight + node[m2].weight;
node[m1].parent = node[m2].parent = i;
w[++wl] = node[m1].weight + node[m2].weight;
}
dfs(root, 0);
cout << ans << endl;
ans = 0;
}
return 0;
}