树9 Huffman Codes
In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] … c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 ‘0’s and ‘1’s.
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
基本思路
一.建立一个小顶栈:
1.1 插入数到栈;
1.2 可以从顶部删除最新的数字
二.通过小顶栈,建立一个Huffuman Tree
2.1 每次提取两个小顶栈的值,组成左右结点,并把两数的和放入小顶栈,反复循环,构成Huffman Tee;
2.2 使用递归的方法计算WPL,就是深度* 频率,这个就是字符串长度;
三.验证输入的例子是否相同;
3.1 相同的条件: WPL 长度相等,同时满足构成Huffman Tree
3.2 WPL : 单个字符编码长度*对应频率,然后求和;
3.3 遍历构建HuffmanTree, 每遍历一个字符编码,应该满足:1.叶子节点,2.该节点没有被别的字符串使用过(可以里面添加参数flag判断)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include<iostream>
using namespace std;
#define MAXSIZE 164
#define MinData -100
typedef struct _node{
int data;
struct _node * left;
struct _node * right;
} Node;
typedef struct _heap{
int size;
Node datas[MAXSIZE];
} Heap;
typedef struct _Node
{
int Flag;
struct _Node *left=NULL;
struct _Node *right=NULL;
} JNode;
Heap * BuildHeap(int arr[],int size);
void Insert(Heap * MinHeap,Node * node);
Node * DeleteMin(Heap * minHeap);
void PrintHeap(Heap * minHeap) ;
Node * CreateNode(int V);
Node * BuildHuffman(Heap * minHeap);
int calculate(Node * root);
int calculate(Node * root,int depth);
void ReadData(int N,int A[])//读取各个节点的权值输入数据
{
char s='\0';
int value=0;
for (int i=0; i<N; ++i)
{
cin>>s;
cin>>value;
A[i]=value;
}
}
void printTree(Node * node){
if(node!=NULL){
printf("%d ",node->data);
printTree(node->left);
printTree(node->right);
}
}
void freeTree(JNode * node){
if(node&& (node->left==NULL)&&(node->right==NULL)){
free(node);
}
if(node&&node->left)
freeTree(node->left);
if(node&&node->right)
freeTree(node->right);
}
bool Judge(char S[],JNode *J,int len)//判断该次编码能否符合前缀编码的要求
{
int i=0;
for (; i<len; ++i)
{
if (S[i]=='0')
{
if (J->left==NULL)
{
JNode *J_1=(JNode*)malloc(sizeof(JNode));
J->left=J_1;
}else
{
if (J->left->Flag==1)
{
return false;
}
}
J=J->left;
}else
{
if (J->right==NULL)
{
JNode *J_1=(JNode*)malloc(sizeof(JNode));
J->right=J_1;
}else
{
if (J->right->Flag==1)
{
return false;
}
}
J=J->right;
}
}
J->Flag=1;
if (J->left==NULL&&J->right==NULL)
{
return true;
}else
{
return false;
}
}
int main(){
int n,m;
scanf("%d\n",&n);
int arrs[n];
ReadData(n,arrs);
Heap * MinHeap = BuildHeap(arrs,n);
Node * rootHuffman = BuildHuffman(MinHeap);
int wpl=calculate(rootHuffman);
scanf("%d\n",&m);
// printf("m=%d n=%d",m,n);
char temp[125]="\0";
char c='\0';
bool result=false;
for(int j=0;j<m;j++){
int count=0,flag=0;
JNode * jnode = (JNode*)malloc(sizeof(JNode));
for (int k=0; k<n; ++k) {
cin>>c>>temp;
int l =strlen(temp);
count+=l*arrs[k];
if(!flag){
result=Judge(temp,jnode,l);
if (!result)
{
flag=1;
}
}
}
// printf("count=%d result=%d \n",count,result);
//freeTree(jnode);
freeTree(jnode);
if (result&&(count==wpl))//前缀编码且编码长度之和与Huffman编码相同
{
cout<<"Yes"<<endl;
}else{
cout<<"No"<<endl;
}
}
return 0;
}
void Insert(Heap * MinHeap,Node* node){
int i;
i=++(MinHeap->size);
for( ;MinHeap->datas[i/2].data > node->data ;i/=2 ){
MinHeap->datas[i]=MinHeap->datas[i/2];
}
// printf("insert = %d \n",node->data);
MinHeap->datas[i]=*node;
}
Heap * BuildHeap(int arr[],int n){
Heap * minHeap = (Heap*)malloc(sizeof(struct _heap));
minHeap->size=0;
minHeap->datas[0]=*CreateNode(MinData);
// printf("BuildHeap\n");
for(int i=0;i<n;i++)
{
Node * node = CreateNode(arr[i]);
Insert(minHeap,node);
}
// PrintHeap(minHeap);
return minHeap;
}
//int arrs[]={1,1,1,3,3,6,6};
Node * CreateNode(int V){
Node * node =(Node*)malloc(sizeof(struct _node)) ;
node->data=V;
node->left=node->right=NULL;
return node;
}
void PrintHeap(Heap * minHeap) {
int length= minHeap->size;
for(int i=1;i<=length;i++){
printf("%d ",minHeap->datas[i].data) ;
}
printf("\n");
}
Node * DeleteMin(Heap * minHeap){
int size = minHeap->size;
int resultsize=(minHeap->size--);
Node * datas = minHeap->datas;
Node lastnode = datas[size];
//删除的时候严重查看这里
Node * firstnode=CreateNode(datas[1].data);
firstnode->left= datas[1].left;
firstnode->right= datas[1].right;
// Node * firstnode= &datas[1];
int parent,child;
for(parent=1;parent*2 <=resultsize;parent=child){
child=parent*2;
if(child!=resultsize&& datas[child].data>datas[child+1].data){
child++;
}
if(lastnode.data<=datas[child].data) break;
else
{
minHeap->datas[parent]= minHeap->datas[child];
}
}
minHeap->datas[parent]=lastnode;
// printf("delete =%d\n",firstnode->data);
return firstnode;
}
Node * BuildHuffman(Heap * minHeap){
// printf("BuildHuffman\n");
int size = minHeap->size;
Node * newNode;
for(int i=0;i<size-1;i++){
newNode = CreateNode(0);
newNode->left =DeleteMin(minHeap);
newNode->right =DeleteMin(minHeap);
newNode->data=newNode->left->data + newNode->right->data;
Insert(minHeap,newNode);
}
newNode = CreateNode(0);
newNode=DeleteMin(minHeap);
return newNode;
}
int calculate(Node * root){
return calculate(root,0);
}
int calculate(Node * node,int depth){
if(node->left==NULL&&node->right==NULL){
return depth*(node->data);
}else{
return calculate(node->left,depth+1)+calculate(node->right,depth+1);
}
}