首先说明的是,该文章中的代码,在实现上可能有需要改正的地方,贴出来是为了给需要的朋友提供参考,也希望能够得到大家的指点。
哈夫曼编码的原理这里不详述,具体可参见Huffman 编码压缩算法,这篇文章给出了算法的清晰易懂的解释。我在实现上,先将文本中的字符存储在一个结构体数组中,(这样便于查找和存储),对其进行排序,排序后将数组转化为二叉树。对二叉树进行遍历,打印出对应的编码。具体代码如下:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define SIZE (127-31)
typedef struct ch_node {
char ch;
int cnt;
struct ch_node *prev;
struct ch_node *parent;
struct ch_node *next;
}_node;
/* init the node array */
int init_node (_node **node, int size)
{
int i;
for (i = 0; i < size; i++) {
node[i] = (_node *)malloc (sizeof (_node));
node[i] -> ch = i+32;
node[i] -> cnt = 0;
node[i] -> parent = node[i] -> next = node[i] -> prev = NULL;
}
return 0;
}
/* display the node array */
int show_node (_node **node, int size)
{
int i;
for(i = 0; i < size; i++){
printf ("%c\t%d\n", node[i]->ch,node[i]->cnt);
}
return 0;
}
/* 计算字符出现次数 */
int count_node (_node **node, const char *buf)
{
int i = 0;
while (*buf) {
i = *buf - 32;
if (i < 0)
break;
node[i]->cnt++;
buf++;
}
return 0;
}
/* 释放字符数组 */
int free_node (_node **node, int size)
{
int i;
for (i = 0; i < size; i++) {
free (node[i]);
}
return 0;
}
/* 对编码表进行排序 */
void my_qsort (_node **node, int l, int r)
{
int i, j;
_node *tmp;
if (l < r) {
i = l;
j = r;
tmp = node[i];
while (i < j){
while (i < j && node[j]->cnt >= tmp->cnt)
j--;
if (i < j)
node[i++] = node[j];
while (i < j && node[i]->cnt <= tmp->cnt)
i++;
if (i < j)
node[j--] = node[i];
}
node[i] = tmp;
my_qsort (node, l, i-1);
my_qsort (node, i+1, r);
}
}
/* 将结构体数组转化为二叉树 */
_node *arr_to_linklist (_node **node, int size)
{
_node *head;
int i;
for (i = 0; i < size; i++) {
if (node[i] -> cnt != 0){ //skip the cnt is 0 node);
break;
}
free (node[i]);
}
head = node[i];
i++; //go to next node
//printf ("====================== =%d\t%d\n", i, size);
for (;i < size; i++) {
node[i-1] -> parent = node[i];
// node[i] -> prev = node[i-1];
// printf ("%d\n", i);
}
return head;
}
/* 对二叉树进行排序 */
int sort_node (_node **node, int size)
{
my_qsort (node, 0, size-1); //size-1 is the read subscript of array
return 0;
}
_node *add_new_node (_node *par, _node *head)
{
if (head == NULL)
return par;
_node *p = head;
//printf ("%d\n", par->cnt);
while (p != NULL) {
if (par->cnt > p->cnt){
par-> parent = p -> parent;
p-> parent = par;
return head;
}
p = p->parent;
}
par->parent = head;
return par;
}
_node *build_node (_node *head)
{
if (head -> parent == NULL)
return head;
_node *par;
_node *left;
_node *right;
par = malloc (sizeof (_node));
if (par == NULL) {
exit (EXIT_FAILURE);
}
par -> ch = -1;
par -> cnt = 0;
par -> next = par -> prev = par -> parent = NULL;
left = head;
right = head -> parent;
_node *n_head = right->parent;
// create parent node;
par -> prev = left;
left -> parent = par;
//left -> head = par;
par -> next = right;
right -> parent = par;
par -> cnt = left -> cnt + right -> cnt;
//printf ("par %d\n", par->cnt);
_node *tmp, *rt;
tmp = add_new_node (par, n_head);
rt = build_node (tmp);
return rt;
}
/* 遍历二叉树 */
int traversal_tree (_node *node)
{
if (node == NULL)
return 0;
traversal_tree (node->prev);
printf ("%d\n",node->cnt);
traversal_tree (node->next);
return 0;
}
int main (int argc, char **argv)
{
if (argc != 2)
exit (EXIT_FAILURE);
_node *node[SIZE];
/*
if (argc != 2) {
fprintf (stderr, "Usage, %s filename", argv[0]);
exit (EXIT_FAILURE);
}
*/
FILE *fp = NULL;
fp = fopen (argv[1], "r");
if (fp == NULL) {
fprintf (stderr, "file can not be open\n");
exit (EXIT_FAILURE);
}
char buf[BUFSIZ] = {'\0'};
/* init node */
init_node (node, SIZE);
while (!feof (fp)) {
fgets (buf, BUFSIZ, fp);
count_node (node, buf);
}
// show_node (node, SIZE);
sort_node (node, SIZE);
// show_node (node, SIZE);
// printf ("===================================\n");
_node *head;
head = arr_to_linklist (node, SIZE);
_node *p = head;
/*
while (p != NULL) {
printf ("%d\n", p->cnt);
p = p -> parent;
}
*/
_node *hff;
hff = build_node (head);
/* condition */
traversal_tree (hff);
// printf ("parent %d\n", hff->cnt);
// free_node (node, SIZE);
return 0;
}
以上代码在gcc version 4.7.3 (Ubuntu/Linaro 4.7.3-1ubuntu1)调试通过。