一、Huffman编解码原理
1. Huffman编码
对原始文件进行Huffman编码,首先需要解决以下几点问题:
- 文件符号的概率分布情况是怎样的?
- Huffman树是如何建立的?
- 建立起Huffman树后,又是怎样读出符号对应码字的?
这三个问题在程序中的实现思路如下图:
将待编码文件里的数据参照已形成的Huffman码表一一进行转换,就可以得到编码后的文件了。
2. Huffman解码
Huffman解码是查表+翻译的过程。读取随接收文件传来的码表后,再逐位读取文件实际数据,对照码表进行翻译即可。
二、程序实现
流程中最关键的对Huffman树的操作在程序中主要通过两个结构体实现:Huffman_node和Huffman_code。
建立的二叉树上每个节点都以Huffman_node类型存在。节点之间的主要关系有父子、兄弟,Huffman_node中定义了指向父节点的指针*parent和指向孩子的指针*zero, *one来表述节点与节点之间的关系。除此之外,还有节点本身的属性:isLeaf、count、symbol。
而编码码字定义为了Huffman_code,本身属性包括码字占用的比特数和码字本身。
具体程序如下,部分理解在注释中给出。
Huffcode.c
/*
* huffcode - Encode/Decode files using Huffman encoding.
* http://huffman.sourceforge.net
* Copyright (C) 2003 Douglas Ryan Richardson; Gauss Interprise, Inc
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "huffman.h"
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <assert.h>
#ifdef WIN32
#include <malloc.h>
extern int getopt(int, char**, char*);
extern char* optarg;
#else
#include <unistd.h>
#endif
static int memory_encode_file(FILE *in, FILE *out);
static int memory_decode_file(FILE *in, FILE *out);
static void
version(FILE *out)
{
fputs("huffcode 0.3\n"
"Copyright (C) 2003 Douglas Ryan Richardson"
"; Gauss Interprise, Inc\n",
out);
}
static void
usage(FILE* out)
{
fputs("Usage: huffcode [-i<input file>] [-o<output file>] [-d|-c]\n"
"-i - input file (default is standard input)\n"
"-o - output file (default is standard output)\n"
"-d - decompress\n"
"-c - compress (default)\n"
"-m - read file into memory, compress, then write to file (not default)\n",
// step1: by yzhang, for huffman statistics
"-t - output huffman statistics\n",
//step1:end by yzhang
out);
}
int
main(int argc, char** argv)
{
char memory = 0;
char compress = 1;
int opt;
const char *file_in = NULL, *file_out = NULL;
//step1:add by yzhang for huffman statistics
const char *file_out_table = NULL;
//end by yzhang
FILE *in = stdin;
FILE *out = stdout;
//step1:add by yzhang for huffman statistics
FILE * outTable = NULL;
//end by yzhang
/* Get the command line arguments. */
while((opt = getopt(argc, argv, "i:o:cdhvmt:")) != -1) //演示如何跳出循环,及查找括号对
{
switch(opt)
{
case 'i':
file_in = optarg;
break;
case 'o':
file_out = optarg;
break;
case 'c':
compress = 1;//压缩
break;
case 'd':
compress = 0;//解压
break;
case 'h':
usage(stdout);
return 0;
case 'v':
version(stdout);
return 0;
case 'm':
memory = 1;
break;
// by yzhang for huffman statistics
case 't':
file_out_table = optarg;
break;
//end by yzhang
default:
usage(stderr);
return 1;
}
}
/* If an input file is given then open it. */
if(file_in)
{
in = fopen(file_in, "rb");
if(!in)
{
fprintf(stderr,
"Can't open input file '%s': %s\n",
file_in, strerror(errno));
return 1;
}
}
/* If an output file is given then create it. */
if(file_out)
{
out = fopen(file_out, "wb");
if(!out)
{
fprintf(stderr,
"Can't open output file '%s': %s\n",
file_out, strerror(errno));
return 1;
}
}
//by yzhang for huffman statistics
if(file_out_table)
{
outTable = fopen(file_out_table, "w");
if(!outTable)
{
fprintf(stderr,
"Can't open output file '%s': %s\n",
file_out_table, strerror(errno));
return 1;
}
}
//end by yzhang
if(memory)
{
return compress ?
memory_encode_file(in, out) : memory_decode_file(in, out);
}
if(compress) //change by yzhang
huffman_encode_file(in, out,outTable);//step1:changed by yzhang from huffman_encode_file(in, out) to huffman_encode_file(in, out,outTable)
else
huffman_decode_file(in, out);
if(in)
fclose(in);
if(out)
fclose(out);
if(outTable)
fclose(outTable);
return 0;
}
static int
memory_encode_file(FILE *in, FILE *out)
{
unsigned char *buf = NULL, *bufout = NULL;
unsigned int len = 0, cur = 0, inc = 1024, bufoutlen = 0;
assert(in && out);
/* Read the file into memory. */
while(!feof(in))
{
unsigned char *tmp;
len += inc;
tmp = (unsigned char*)realloc(buf, len);
if(!tmp)
{
if(buf)
free(buf);
return 1;
}
buf = tmp;
cur += fread(buf + cur, 1, inc, in);
}
if(!buf)
return 1;
/* Encode the memory. */
if(huffman_encode_memory(buf, cur, &bufout, &bufoutlen))
{
free(buf);
return 1;
}
free(buf);
/* Write the memory to the file. */
if(fwrite(bufout, 1, bufoutlen, out) != bufoutlen)
{
free(bufout);
return 1;
}
free(bufout);
return 0;
}
static int
memory_decode_file(FILE *in, FILE *out)
{
unsigned char *buf = NULL, *bufout = NULL;
unsigned int len = 0, cur = 0, inc = 1024, bufoutlen = 0;
assert(in && out);
/* Read the file into memory. */
while(!feof(in))
{
unsigned char *tmp;
len += inc;
tmp = (unsigned char*)realloc(buf, len);
if(!tmp)
{
if(buf)
free(buf);
return 1;
}
buf = tmp;
cur += fread(buf + cur, 1, inc, in);
}
if(!buf)
return 1;
/* Decode the memory. */
if(huffman_decode_memory(buf, cur, &bufout, &bufoutlen))
{
free(buf);
return 1;
}
free(buf);
/* Write the memory to the file. */
if(fwrite(bufout, 1, bufoutlen, out) != bufoutlen)
{
free(bufout);
return 1;
}
free(bufout);
return 0;
}
Huffman.c
/*
* huffman - Encode/Decode files using Huffman encoding.
* http://huffman.sourceforge.net
* Copyright (C) 2003 Douglas Ryan Richardson; Gauss Interprise, Inc
*
* This library is free software; you can redistribute it and/or
* mod