决心写一个哈夫曼树,巩固自己学的数据结构
一、最优树的定义
结点的路径长度定义为:
从根结点到该结点的路径上
分支的数目。
树的路径长度定义为:
树中每个结点的路径长度之和。
树的带权路径长度定义为:
树中所有叶子结点的带权路径长度之和
WPL(T) = Swklk(对所有叶子结点)。
在所有含 n 个叶子结点、并带相同权
值的 m 叉树中,必存在一棵其带权路径
长度取最小值的树,称为“最优树”。
二、如何构造最优树
(1)根据给定的 n个权值 {w1, w2,…, wn},构造 n 棵二叉树的集合
F ={T1, T2, …, Tn},
其中每棵二叉树中均只含一个带权值
为 w i 的根结点,其左、右子树为空树
(2)在 F中选取其根结点的权值为最
小的两棵二叉树,分别作为左、
右子树构造一棵新的二叉树,并
置这棵新的二叉树根结点的权值
为其左、右子树根结点的权值之
和;
(3)从F中删去这两棵树,同时加入
刚生成的新树;
重复(2)和(3)两步,直至F中只
含一棵树为止。
忙完攻防大赛之后自己终于可以敲敲代码了。
把拖了两个礼拜的哈夫曼编码搞定了。哈哈。
代码除了书上的6.12算法 HuffanCoding ,建立哈夫曼树和构造哈夫曼编码,其它全部都是自己写的。
算法本身不难,就是多了对文件的操作。一些细节的地方自己还是调试了很长时间。
如果大家对哈夫曼树不是很清楚,下面两篇博客讲的很清楚:
http://www.thecodeway.com/blog/?p=870
http://www.cnblogs.com/syblogs/articles/2020145.html
简单描述下题目:
一个完整的哈夫曼编码、译码系统:
I 初始化,建立哈夫曼树,存于文件hfmTree中
E 编码 将建好的哈夫曼树,对ToBeTran中的正文进行编码,将结果存入CodeFile中
D 译码 利用建好的哈夫曼树将文件CodeFile中的代码进行译码,结果存入TextFile中
P 印代码文件 将文件CodeFile以紧凑格式显示在终端上。同时将此字符形式的编码文件写入文件CodePrin中
T 打印哈夫曼树,显示在终端上
Q 退出
最后,上代码:
huffman_define.h
#include <stdio.h>
#include <malloc.h>
#include <string.h>
//define
#define OK 1
#define ERROR 0
#define OVERFLOW -1
#define MAXWEIGHT 1000
//typedef
typedef int Status;
typedef char TElemType;
typedef struct{
unsigned int weight;
unsigned int parent, lchild, rchild;
}HTNode, *HuffmanTree;
typedef char * *HuffmanCode;
//functions
Status Select(HuffmanTree HT, int i, unsigned int *s1, unsigned int *s2);
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, int * w, int n);
huffman_define.cpp
#include "huffman_define.h"
//选择最小的2个序列号
Status Select(HuffmanTree HT, int i, unsigned int *s1, unsigned int *s2)
{
int count = 0;
//to know if is over
for (int j = 1; j <= i; j++)
{
if (HT[j].parent == 0)
count++;
}
if (count == 0)
return 2;
//to init s1, s2
*s1 = 0;
*s2 = 0;
for (int j = 1; j <= i; j++)
{
if (HT[j].parent != 0)
continue;
if (HT[j].weight < HT[*s1].weight)
*s1 = j;
}
for (int j = 1; j <= i; j++)
{
if (HT[j].parent != 0)
continue;
if (HT[j].weight < HT[*s2].weight && j != *s1)
*s2 = j;
}
return 0;
}
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, int *w, int n){
//w存放n个字符的权值,构造哈夫曼树HT, 并求n个字符的哈夫曼编码HC
int m = 0;
int i = 0;
unsigned int s1 = 0;
unsigned int s2 = 0;
int c = 0;
int start = 0;
unsigned int f = 0;
char *cd = NULL;
HuffmanTree p = NULL;
if (n <= 1)
return;
m = 2 * n - 1;
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));//0 not used?
HT[0].weight = MAXWEIGHT;
for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)
{
p->weight = *w;
p->lchild = 0;
p->rchild = 0;
p->parent = 0;
}
for (; i <= m; ++i, ++p)
{
p->weight = 0;
p->lchild = 0;
p->rchild = 0;
p->parent = 0;
}
for (i = n + 1; i <= m; ++i){//create huffman tree
if (2 == Select(HT, i - 1, &s1, &s2))
break;
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
//--get char huffman coding from the leave node
HC = (HuffmanCode)malloc((n + 1) * sizeof(char*));
cd = (char *)malloc(n * sizeof(char));
cd[n-1] = '\0';
for (i = 1; i <= n; ++i)
{
start = n - 1;
for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent)
{
if (HT[f].lchild == c)
cd[--start] = '0';
else
cd[--start] = '1';
}
HC[i] = (char *)malloc((n - start) * sizeof(char));
strcpy(HC[i], &cd[start]);
}
free(cd);
}//HuffmanCoding
huffman.cpp
#include "huffman_define.h"
//functions
int Init(HuffmanTree &HT, HuffmanCode &HC, char a[], int b[]);
void Encoding(HuffmanCode HC, int n, char a[]);
void Decoding(HuffmanCode HC, int n, char a[]);
void Print();
void TreePrint(HuffmanTree HT, int n);
//filepaths
char TEST[20] = "D:\\txt\\TEST.txt";
char hfmTree[20] = "D:\\txt\\hufmTree.txt";
char ToBeTran[20] = "D:\\txt\\ToBeTran.txt";
char CodeFile[20] = "D:\\txt\\CodeFile.txt";
char TextFile[20] = "D:\\txt\\TextFile.txt";
char CodePrin[20] = "D:\\txt\\CodePrin.txt";
char TreePrin[20] = "D:\\txt\\TreePrin.txt";
int main()
{
int n = 0;
char a[100];
int b[100];
HuffmanTree HT;
HuffmanCode HC;
char choise;
while(1)
{
printf("\n\n-------------------------------------------------------------------------\n");
printf("It's huffman coding_encoding system. Please Input your chioise\n");
printf("I: Initialization\nE:Encoding\nD:Decoding\nP:Print\nT:Tree printing\nQ:Quit\n");
scanf("%c", &choise);
switch(choise)
{
case ('I'):
n = Init(HT, HC, a, b);
break;
case ('E'):
Encoding(HC, n, a);
break;
case ('D'):
Decoding(HC, n, a);
break;
case ('P'):
Print();
break;
case ('T'):
TreePrint(HT, n);
break;
case ('Q'):
return 0;
default:
printf("Input wrong, please inpint again!~~\n");
break;
}
getchar();
}
return 0;
}
int Init(HuffmanTree &HT, HuffmanCode &HC, char a[], int b[])
{
memset(a, 0, 100);
int n = 0;
int i = 0;
printf("please input n:\n");
scanf("%d", &n);
printf("please input %d code and weight:\n", n);
for (i = 0; i < n; i++)
{
getchar();
scanf("%c", &a[i]);
scanf("%d", &b[i]);
}
HuffmanCoding(HT, HC, b, n);
FILE *f1 = fopen(hfmTree, "w+");
for (int i = 1; i < 2 * n - 1; i++)
{
fprintf(f1, "%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild);
}
fclose(f1);
return n;
}
void Encoding(HuffmanCode HC, int n, char a[])
{
FILE *f1 = fopen(ToBeTran, "r+");
FILE *f2 = fopen(CodeFile, "w+");
char temp = 0;
while (fscanf(f1, "%c", &temp) != EOF)
{
for (int i = 0; i < n; i++)
{
if (a[i] == temp)
{
fprintf(f2, "%s\n", HC[i+1]);
break;
}
}
}
fclose(f1);
fclose(f2);
}
void Decoding(HuffmanCode HC, int n, char a[])
{
char temp[100];
memset(temp , 0, 100);
FILE *f1 = fopen(CodeFile, "r+");
FILE *f2 = fopen(TextFile, "w+");
while (fscanf(f1, "%s", temp) != EOF)
{
for (int i = 1; i <= n; i++)
{
if (!strcmp(temp, HC[i]))
{
fprintf(f2, "%c", a[i-1]);
break;
}
}
memset(temp, 0, 100);
}
fclose(f1);
fclose(f2);
}
void Print()
{
int count = 0;
char temp[100];
memset(temp, 0, 100);
FILE *f1 = fopen(CodeFile, "r+");
FILE *f2 = fopen(CodePrin, "w+");
while (fscanf(f1, "%s", temp) != EOF)
{
printf("%s", temp);
fprintf(f2, "%s", temp);
memset(temp, 0, 100);
count += strlen(temp);
if (count >= 50)
{
printf("\n");
fprintf(f2, "\n");
count = 0;
}
}
printf("\n");
fprintf(f2, "\n");
fclose(f1);
fclose(f2);
}
void TreePrint(HuffmanTree HT, int n)
{
FILE *f1 = fopen(TreePrin, "w+");
for (int i = 1; i <= n; i++)
{
printf("%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild);
fprintf(f1, "%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild);
}
fclose(f1);
}
自己做的主要工作是实现了Select 函数,就是从已经存在的节点中选取两个权重最小的节点。
另外,书上的算法有点小问题。
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode)); //0 not used?
这里它申请了 m + 1 个节点大小的空间。根据书上的算法,他是从第1个开始的。没有用第0个节点。
所以我们要稍微改进下书上的代码:
for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)
对于第0个节点,我们可以这样用:
#define MAXWEIGHT 1000
HT[0].weight = MAXWEIGHT;
然后在Select函数里面每次赋初值的时候将
s1 = s2 = 0;
这样就比较方便了。