05-树9 Huffman Codes (30 分)

最新推荐文章于 2021-11-22 20:13:18 发布

语阑gz

最新推荐文章于 2021-11-22 20:13:18 发布

阅读量185

点赞数

分类专栏： PAT

本文链接：https://blog.csdn.net/qq_42351784/article/details/103335769

版权

PAT 专栏收录该内容

31 篇文章 0 订阅

订阅专栏

In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0’s and '1’s.

Output Specification:

For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No

本题分两步，第一步是判断是不是最低总权重，第二部判断是否有前缀码，即是否有二意解码

为了求出最低总权重，我们需要构造一颗Huffman树，并计算出最低总权重

typedef struct HuffNode* Huffman;
struct HuffNode
{
	int weight;
	Huffman left, right;
};

而为了构造一颗Huffman是，我们还需要一个最小堆

typedef struct HeapNode* Heap;
struct HeapNode
{
	Huffman* elements;
	int size;
	int capacity;
};

堆的操作定义如下

Heap CreateHeap(int n)
{
	Heap H;
	H = (Heap)malloc(sizeof(HeapNode));
	H->elements = (Huffman*)malloc(2 * n * sizeof(Huffman));
	H->capacity = 2 * n - 1;
	H->size = 0;
	H->elements[0] = (Huffman)malloc(sizeof(HeapNode));
	H->elements[0]->weight = 0;
	H->elements[0]->left = NULL;
	H->elements[0]->right = NULL;
	return H;
}

void PreDown(Heap H, int node)
{
	Huffman temp = H->elements[node];
	int child;
	while (node * 2 <= H->size)
	{
		child = node * 2;
		if (node * 2 != H->size && H->elements[child]->weight > H->elements[child + 1]->weight)
			child++;
		if (H->elements[child]->weight < temp->weight)
		{
			H->elements[node] = H->elements[child];
			node = child;
		}
		else
			break;
	}
	H->elements[node] = temp;
	
}

void Build(Heap H)
{
	int temp;
	for (int i = H->size / 2; i > 0; i--)
		PreDown(H, i);
}

Huffman Delet(Heap H)
{
	Huffman data;
	if (H->size == 0)
		return NULL;
	else
	{
		data = H->elements[1];
		H->elements[1] = H->elements[H->size--];
		PreDown(H, 1);
	}
	return data;
}

void Insert(Heap H, Huffman elem)
{
	H->elements[++H->size] = elem;
	int temp = elem->weight;
	int node = H->size;
	while (temp<H->elements[node/2]->weight)
	{
		H->elements[node] = H->elements[node / 2];
		node = node / 2;
	}
	H->elements[node] = elem;
}

对Huffman树的操作定义如下

Huffman CreateHuffman(Heap H,int n)
{
	Huffman T;
	for (int i = 0; i < n - 1; i++)
	{
		T = (Huffman)malloc(sizeof(HuffNode));
		T->left = Delet(H);
		T->right = Delet(H);
		T->weight = T->left->weight + T->right->weight;
		Insert(H, T);
	}
	T = Delet(H);
	return T;
}

int ComputeTotalWeight(Huffman T)
{
	int totalweight = 0;
	queue<Huffman> q;
	int layer[130];
	int i = 0, j = 0;
	layer[j] = 0;
	q.push(T);
	while (!q.empty())
	{
		T = q.front();
		q.pop();
		if (T->left == NULL && T->right == NULL)
		{
			totalweight = totalweight + T->weight * layer[i];
		}
		
		if (T->left)
		{
			q.push(T->left);
			layer[++j] = layer[i] + 1;
		}
		if (T->right)
		{
			q.push(T->right);
			layer[++j] = layer[i] + 1;
		}

		i++;
	}
	return totalweight;
}

主函数如下

int main()
{
	int n;
	cin >> n;
	map<char, int> mweight;
	char code;
	int weight;
	for (int i = 0; i < n; ++i)
	{
		cin >> code >> weight;
		mweight[code] = weight;
	}
	Heap H;
	H = CreateHeap(n);
	Huffman T;
	for (auto it = mweight.begin(); it != mweight.end(); it++)
	{
		T = (Huffman)malloc(sizeof(HuffNode));
		T->left = NULL;
		T->right = NULL;
		T->weight = it->second;
		H->elements[++H->size] = T;
	}
	Build(H);
	T = CreateHuffman(H, n);	//利用堆生成了一颗Huffman树
	int totalweight;
	totalweight = ComputeTotalWeight(T); //计算最小总权重
	int jn, testtotalweight;
	string decode;
	cin >> jn;
	map<char, string> mdecode;
	for (int i = 0; i < jn; i++)//依次判断
	{
		for (int i = 0; i < n; ++i)
		{
			cin >> code >> decode;
			mdecode[code] = decode;
		}
		testtotalweight = 0;
		for (auto it = mdecode.begin(); it != mdecode.end(); it++)
			testtotalweight = testtotalweight + mweight[it->first] * it->second.length();
		if (testtotalweight > totalweight)//总权重大于最小总权重
			cout << "No" << endl;
		else
		{
			bool pre = false;//判断是否有前缀编码
			for (auto it = mdecode.begin(); it != (mdecode.end()); it++)
			{
				for (auto it2 = it; it2 != mdecode.end(); it2++)
				{
					if (it2 == it)
						continue;
					if (it->second == it2->second)
					{
						pre = true;
						break;
					}
					else if (it->second.length() < it2->second.length())
					{
						if (it->second == it2->second.substr(0, it->second.length()))
						{
							pre = true;
							break;
						}
					}
					else if (it->second.length() > it2->second.length())
					{
						if (it2->second == it->second.substr(0, it2->second.length()))
						{
							pre = true;
							break;
						}
					}

				}
				if (pre)
					break;
				
			}
			if (pre)
				cout << "No" << endl;
			else
				cout << "Yes" << endl;
		}
	}


	return 0;
}