11.1 Huffman编码
Huffman设计了一个可以用来构造一种称为Huffman编码的最优前缀码的贪心算法。
11.1.1 实例
PKU JudgeOnline, 1521, Entropy.
11.1.2 问题描述
给定一组ASCII码组成的字符串,求用Huffman编码该字符串所需的长度,及采用该编码带来的压缩比。
11.1.3 输入
AAAAABCD
THE_CAT_IN_THE_HAT
END
11.1.4 输出
6413 4.9
144 51 2.8
11.1.5 分析
采用贪心算法,使用最小堆就可以实现Huffman树的构建。为了便于计算每种字符对应的码的长度,用update()维护depth数组。如果需要求出每种字符对应的编码,只需要进一步增强update()函数即可。
注意:对于只有同一种字符的字符串要特殊处理。
11.1.6 程序
#include <iostream>
#include <stdio.h>
#include <string.h>
using namespace std;
#define maxNum ('_' - 'A' + 1)
#define MAX 0x7F7F7F7F
struct node{
int key;
int pos;
};
nodeminHeap[maxNum];
int heapSize;
void minHeapify(int i)
{
int l;
int r;
int min;
node temp;
l = i * 2;
r = i * 2 + 1;
min = i;
if((l <=heapSize)&&
(minHeap[l].key < minHeap[min].key)){
min = l;
}
if((r <=heapSize)&&
(minHeap[r].key < minHeap[min].key)){
min = r;
}
if(min !=i)
{
temp = minHeap[min];
minHeap[min] = minHeap[i];
minHeap[i] = temp;
minHeapify(min);
}
}
void buildMinHeap()
{
int i;
for(i =heapSize / 2; i > 0; i--){
minHeapify(i);
}
}
nodeheapExtractMin()
{
node min;
if(heapSize< 1)
{
cout << "ERROR:no more" << endl;
}
min = minHeap[1];
minHeap[1] = minHeap[heapSize];
heapSize--;
minHeapify(1);
return min;
}
void heapIncreaseKey(int i, int key)
{
node temp;
if(key >minHeap[i].key)
{
cout << "ERROR:new key is smaller than current key" << endl;
}
minHeap[i].key = key;
while(i> 1 && minHeap[i/2].key > minHeap[i].key)
{
temp = minHeap[i];
minHeap[i] = minHeap[i/2];
minHeap[i/2] = temp;
i = i / 2;
}
}
void minHeapInsert(node temp)
{
heapSize++;
minHeap[heapSize] = temp;
minHeap[heapSize].key = MAX;
heapIncreaseKey(heapSize, temp.key);
}
int f[maxNum + maxNum];
int huffmanLeft[maxNum + maxNum];
int huffmanRight[maxNum + maxNum];
int depth[maxNum];
int huffmanTop;
void update(int x)
{
if(x <0)
{
return;
}
if(x <maxNum){
depth[x]++;
return;
}
update(huffmanLeft[x]);
update(huffmanRight[x]);
}
int huffman()
{
node x;
node y;
node z;
huffmanTop = maxNum;
while(heapSize> 1){
x = heapExtractMin();
y = heapExtractMin();
z.pos = huffmanTop;
huffmanLeft[z.pos] = x.pos;
huffmanRight[z.pos] = y.pos;
z.key = x.key + y.key;
f[z.pos] = f[x.pos] + f[y.pos];
minHeapInsert(z);
update(x.pos);
update(y.pos);
huffmanTop++;
}
z = heapExtractMin();
if(z.pos !=huffmanTop - 1)
{
int *a= (int *)0;
*a = 0;
printf("error\n");
}
returnz.pos;
}
int main()
{
charstr[10000];
int pos;
int length;
int sum;
int i;
int n;
while(scanf("%s", str)){
if(strcmp("END", str) == 0)
{
break;
}
memset(f, 0, sizeof(f));
memset(minHeap, 0, sizeof(minHeap));
length = strlen(str);
n = 0;
for(i =0; i < length; i++){
pos = str[i] - 'A';
if(f[pos]== 0)
{
n++;
}
f[pos]++;
}
pos = 1;
for(i =0; i < maxNum; i++){
if(f[i]!= 0){
minHeap[pos].key = f[i];
minHeap[pos].pos = i;
pos++;
}
}
if(n> 1){
heapSize = n;
buildMinHeap();
memset(huffmanLeft, 0xff, sizeof(huffmanLeft));
memset(huffmanRight, 0xff, sizeof(huffmanRight));
memset(depth, 0, sizeof(depth));
huffman();
sum = 0;
for(i= 0; i < maxNum; i++){
sum += depth[i] * f[i];
}
}else{
sum = length;
}
length = length * 8;
printf("%d%d %.1f\n", length, sum, (double)length/ sum);
}
}
本文章欢迎转载,请保留原始博客链接http://blog.csdn.net/fsdev/article