05-树9 Huffman Codes

最新推荐文章于 2022-04-24 22:09:29 发布

lhrer

最新推荐文章于 2022-04-24 22:09:29 发布

阅读量63

点赞数

分类专栏：浙大数据结构mooc

本文链接：https://blog.csdn.net/m0_52089790/article/details/120025898

版权

Huffman编码最小堆前缀编码压缩算法程序设计

关键词由CSDN通过智能技术生成

浙大数据结构mooc 专栏收录该内容

27 篇文章 0 订阅

订阅专栏

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

结尾无空行

Sample Output:

Yes
Yes
No
No

结尾无空行

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>

typedef struct HfmTNode * HuffmanTree;
struct HfmTNode {
    int Weight;
    HuffmanTree Left;
    HuffmanTree Right;
};

typedef struct HNode * Heap;
typedef HuffmanTree ElementType;
struct HNode {
    ElementType * Data;//数组
    int Size;//堆中当前元素个数
    int Capacity;//堆的最大容量
};
typedef Heap MinHeap;


MinHeap CreateMinHeap(int capacity)
{
    Heap H=(Heap)malloc(sizeof(struct HNode));
    H->Data=(ElementType *)malloc((capacity+1)*sizeof(ElementType));
    H->Size=0;
    H->Capacity=capacity;
//这题不设置哨兵，因为要把HuffmanTree的节点HfmTNode的地址作为指针插入最小堆
    return H;
}

bool Insert(MinHeap H,HuffmanTree hfmT)
{
    int intend = ++H->Size;
    for(;intend/2>0 && H->Data[intend/2]->Weight>hfmT->Weight;intend/=2){
        H->Data[intend]=H->Data[intend/2];
    }
    H->Data[intend]=hfmT;
    return true;
}
ElementType DeleteMin(MinHeap H)
{
    ElementType hfmT=H->Data[H->Size--];
    ElementType MinData=H->Data[1];//需返回的根节点存放的最小值
    int intend=1,MinChild;//MinChild为最小儿子的下标
    for(;intend*2<=H->Size;intend=MinChild){
        MinChild=2*intend;
        if( MinChild+1<=H->Size && H->Data[MinChild+1]->Weight < H->Data[MinChild]->Weight)   MinChild++;//如果有右儿子且右儿子的值小
        if(hfmT->Weight < H->Data[MinChild]->Weight) break;
        H->Data[intend]=H->Data[MinChild];
    }
    H->Data[intend]=hfmT;

    return MinData;
}


int WPL(HuffmanTree hfmT,int depth)
{
    if(hfmT->Left==NULL && hfmT->Right==NULL)   return  depth*hfmT->Weight;
    return  WPL(hfmT->Left,depth+1)+WPL(hfmT->Right,depth+1);
}



struct info{
    char character;
    int weight;
};
struct info * array;

bool Check(int N,int WeightedPathLength){
    bool flag=true;//就算判断完了也要读完后续的
    int i,wpl=0;
    char ch;
    char str[N];//编码长度最多N-1
    HuffmanTree root=(HuffmanTree)malloc(sizeof(struct HfmTNode));root->Left=root->Right=NULL;
    root->Weight=-1;//weight>=0的表示有权重的节点，即我们要编码的字符
    HuffmanTree tree=root;
    
    for(i=0;i<N;i++){
        tree=root;
        while( (ch=getchar())!='\n' )   continue;
        ch=getchar();
        scanf("%s",str);
        if(strlen(str)>N-1) flag=false;
        wpl+=array[i].weight*strlen(str);

        for(int j=0;str[j]!='\0';j++){

            if(str[j]=='0'){
                if(tree->Left==NULL){
                    HuffmanTree NewNode=(HuffmanTree)malloc(sizeof(struct HfmTNode));NewNode->Left=NewNode->Right=NULL;
                    NewNode->Weight=-1;
                    tree->Left=NewNode;
                    tree=tree->Left;
                }else{
                    if(tree->Left->Weight>=0 || j==strlen(str)-1){
                        flag=false;break;
                    }
                    else{
                        tree=tree->Left;
                    }
                }
            }else{
                 if(tree->Right==NULL){
                    HuffmanTree NewNode=(HuffmanTree)malloc(sizeof(struct HfmTNode));NewNode->Left=NewNode->Right=NULL;
                    NewNode->Weight=-1;
                    tree->Right=NewNode;
                    tree=tree->Right;
                }else{
                    if(tree->Right->Weight>=0 || j==strlen(str)-1){
                        flag=false;break;
                    }
                    else{
                        tree=tree->Right;
                    }
                }
            }
            if(j==strlen(str)-1)    tree->Weight=array[i].weight;
        }
    }

    if(wpl==WeightedPathLength && flag==true)    return true;
    else    return false;
}

int main()
{
    int N,i,v;
    char ch;
    HuffmanTree hfmT;
    int WeightedPathLength;
    scanf("%d",&N);

    array=(struct info *)malloc(N*sizeof(struct info));

    MinHeap heap=CreateMinHeap(63);
    for(i=0;i<N;i++){
        getchar();
        scanf("%c %d",&ch,&v);

        array[i].character=ch;array[i].weight=v;

        hfmT=(HuffmanTree)malloc(sizeof(struct HfmTNode));hfmT->Left=hfmT->Right=NULL;
        hfmT->Weight=v;
        Insert(heap,hfmT);
    }

    for(i=0;i<N-1;i++){
        hfmT=(HuffmanTree)malloc(sizeof(struct HfmTNode));
        hfmT->Left=DeleteMin(heap);
        hfmT->Right=DeleteMin(heap);
        hfmT->Weight=hfmT->Left->Weight+hfmT->Right->Weight;
        Insert(heap,hfmT);
    }

    WeightedPathLength=WPL(hfmT,0);

    int M;
    scanf("%d",&M);
    for(i=0;i<M;i++){
        if(Check(N,WeightedPathLength)) puts("Yes");
        else    puts("No");
    }

    return 0;
}