Problem Description
字符的编码方式有多种,除了大家熟悉的ASCII编码,哈夫曼编码(Huffman Coding)也是一种编码方式,它是可变字长编码。该方法完全依据字符出现概率来构造出平均长度最短的编码,称之为最优编码。哈夫曼编码常被用于数据文件压缩中,其压缩率通常在20%~90%之间。你的任务是对从键盘输入的一个字符串求出它的ASCII编码长度和哈夫曼编码长度的比值。
Input
输入数据有多组,每组数据一行,表示要编码的字符串。
Output
对应字符的ASCII编码长度la,huffman编码长度lh和la/lh的值(保留一位小数),数据之间以空格间隔。
Example Input
AAAAABCD
THE_CAT_IN_THE_HAT
Example Output
64 13 4.9
144 51 2.8
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#define maxsize 1000
typedef struct nodee {
int weight;
struct node *lc,*rc;
}hfmtree;
typedef struct noden {
int size;
int capacity;
int *data;
}maxheap;
maxheap * create(int b[],int top) {
int i;
maxheap * h;
h=(maxheap *)malloc(sizeof(maxheap));
h->data=(int *)malloc((maxsize+1)*sizeof(int));
h->size=top+1;
h->capacity=maxsize;
h->data[0]=-1;
for (i=1;i<=top+1;i++) {
h->data[i]=b[i-1];
}
return h;
}
maxheap * adjust(maxheap * h) {
int parent ,child;
int tmp;
int j;
j=h->size/2;
while(j>=1) {
for (parent=j;parent*2<=h->size;parent=child) {
child=2*parent;
if(child!=h->size) {
if(h->data[child]>h->data[child+1]) {
child++;
}
}
if(h->data[parent]>h->data[child]) {
tmp=h->data[parent];
h->data[parent]=h->data[child];
h->data[child]=tmp;
} else {
break;
}
}
j--;
}
return h;
}
int dele(maxheap * h) {
int parent ,child;
int minnum;
int item;
minnum=h->data[1];
item=h->data[h->size];
h->size--;
for (parent=1;2*parent<=h->size;parent=child) {
child=2*parent;
if(child!=h->size) {
if(h->data[child]>h->data[child+1]) {
child++;
}
}
if(item<h->data[child]) {
break;
} else {
h->data[parent]=h->data[child];
}
}
h->data[parent]=item;
return minnum;
}
void insert(int item,maxheap * h) {
int parent,child;
h->size++;
parent=h->size/2;
child=h->size;
for (;item<h->data[parent];parent=parent/2) {
h->data[child]=h->data[parent];
child/=2;
}
h->data[child]=item;
}
//hfmtree * build_hfmtree(int num) {
// hfmtree * t;
// t=(hfmtree *)malloc(sizeof(hfmtree));
// t->weight=num;
// t->lc=t->rc=NULL;
// return t;
//}
int * create_hfmtree(maxheap * h) {
int i;
int a,b;
int k;
hfmtree * t;
int max=0;
k=h->size;
for (i=1;i<k;i++) {
a=dele(h);
b=dele(h);
insert(a+b,h);
max+=a+b;
}
return max;
}
int main() {
int i,j,len,max,ans;
double ave;
char str[1006];
int a[256]= {0}; //a 统计各个字符出现的概率
int b[1000]={0},top; //b 存放各个字符的权值
hfmtree * tree;
maxheap * heap;
while(scanf("%s",str)!=EOF) {
memset(a,0,sizeof(a));
memset(b,0,sizeof(b));
max=0;
len=strlen(str);
for (i=0;i<len;i++) {
a[str[i]]++;
if(str[i]>max){
max=str[i];//字符串中的字符对应的最大ASCII值,作为后面循环结束的条件
}
}
top=-1;
for (i=0;i<max+5;i++) {
if(a[i]) {
b[++top]=a[i];
}
}
heap=create(b,top);
heap=adjust(heap);
ans=create_hfmtree(heap);
ave=(8.0*len)/ans;
printf("%d %d %.1lf\n",8*len,ans,ave);
}
return 0;
}