一、实验原理
Huffman编码实现的数据结构
Huffman编码为可变长编码,若各码字长度按照所对应符号出现概率的大小逆序排列,则其平均长度最小。
编码步骤:
1、将信源符号按照出现概率由大到小的顺序排列;
2、将两个最小概率组合相加,并继续这一步骤,时钟将较高的概率分支放在上部,直到概率到达1为止;
3、对每对组合的上边一个指定为1,下边一个指定为0(或相反指定);
4、画出由概率1处到每个信源符号概率的路径,顺序记录下沿路径的1和0,所得即为该符号的Huffman码字。
在程序中具体实现上述方法的步骤如下:
1、读入待编码的文件(格式不限可能是文档、音频等);
2、扫描文件,统计各个字符出现的概率并建立相应的树叶节点;
3、建立Huffman树,
(1)按字符概率由小到大将对应结点排序
(2) 得到文件出现的字符种类数
(3)构建霍夫曼树:先置两个初始树叶节点,再构造俩个树叶节点的父节点(合并概率),再将一节点置空,重新排序。
(4)对码树编码:函数中对最后排好序的概率遍历,判断是否为树叶节点,若是则构造huffman码结构否则递归函数直到达到树叶节点。
4、将码表及其他必要信息写入输出文件
5、第二次扫描:对源文件进行编码并输出
二、部分代码及注释:
huffman.h
#ifndef HUFFMAN_HUFFMAN_H
#define HUFFMAN_HUFFMAN_H
#include <stdio.h>
int huffman_encode_file(FILE *in, FILE *out,FILE *out_Table);//step1: changed by yzhang for huffman statistics
int huffman_decode_file(FILE *in, FILE *out);
int huffman_encode_memory(const unsigned char *bufin,
unsigned int bufinlen,
unsigned char **pbufout,
unsigned int *pbufoutlen);
int huffman_decode_memory(const unsigned char *bufin,
unsigned int bufinlen,
unsigned char **bufout,
unsigned int *pbufoutlen);
#endif
getopt.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* declarations to provide consistent linkage */
extern char *optarg;
extern int optind;
extern int opterr;
int opterr = 1, /* if error message should be printed */
optind = 1, /* index into parent argv vector */
optopt, /* character checked for validity */
optreset; /* reset getopt */
char *optarg; /* argument associated with option */
#define BADCH (int)'?'
#define BADARG (int)':'
#define EMSG ""
/*
* getopt --
* Parse argc/argv argument vector.
*/
int
getopt(int nargc, char * const *nargv, const char* ostr)
{
static char *place = EMSG; /* option letter processing */
char *oli; /* option letter list index */
if (optreset || !*place)/*如果重置的参数optreset为1或当前扫描的字符为空,则重置*/
{
/* update scanning pointer */
optreset = 0;
if (optind >= nargc || *(place = nargv[optind]) != '-') {
place = EMSG;
return (EOF);
}
if (place[1] && *++place == '-') { /* found "--" */
++optind;
place = EMSG;
return (EOF);
}
} /* option letter okay? */
if ((optopt = (int)*place++) == (int)':' ||
!(oli = strchr(ostr, optopt))) {
/*
* if the user didn't specify '-' as an option,
* assume it means EOF.
*/
if (optopt == (int)'-')
return (EOF);
if (!*place)
++optind;
if (opterr && *ostr != ':')
(void)fprintf(stderr,
"%s: illegal option -- %c\n", __FILE__, optopt);
return (BADCH);
}
if (*++oli != ':') { /* don't need argument */
optarg = NULL;
if (!*place)
++optind;
}
else { /* need an argument */
if (*place) /* no white space */
optarg = place;
else if (nargc <= ++optind) { /* no arg */
place = EMSG;
if (*ostr == ':')
return (BADARG);
if (opterr)
(void)fprintf(stderr,
"%s: option requires an argument -- %c\n",
__FILE__, optopt);
return (BADCH);
}
else /* white space */
optarg = nargv[optind];
place = EMSG;
++optind;
}
return (optopt); /* dump back option letter */
}
huffcode.c
#include "huffman.h"
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <assert.h>
#ifdef WIN32
#include <malloc.h>
extern int getopt(int, char**, char*);
extern char* optarg;
#else
#include <unistd.h>
#endif
static int memory_encode_file(FILE *in, FILE *out);
static int memory_decode_file(FILE *in, FILE *out);
static void
version(FILE *out)
{
fputs("huffcode 0.3\n"
"Copyright (C) 2003 Douglas Ryan Richardson"
"; Gauss Interprise, Inc\n",
out);
}
static void
usage(FILE* out)
{
fputs("Usage: huffcode [-i<input file>] [-o<output file>] [-d|-c]\n"
"-i - input file (default is standard input)\n"
"-o - output file (default is standard output)\n"
"-d - decompress\n"
"-c - compress (default)\n"
"-m - read file into memory, compress, then write to file (not default)\n",
// step1: by yzhang, for huffman statistics
"-t - output huffman statistics\n",
//step1:end by yzhang
out);
}
int
main(int argc, char** argv)
{
char memory = 0;
char compress = 1;
int opt;
const char *file_in = NULL, *file_out = NULL;
//step1:add by yzhang for huffman statistics
const char *file_out_table = NULL;
//end by yzhang
FILE *in = stdin;
FILE *out = stdout;
//step1:add by yzhang for huffman statistics
FILE * outTable = NULL;
//end by yzhang
/* Get the command line arguments. */
while((opt = getopt(argc, argv, "i:o:cdhvmt:")) != -1) //演示如何跳出循环,及查找括号对
{
switch(opt)
{
case 'i':
file_in = optarg;
break;
case 'o':
file_out = optarg;
break;
case 'c':
compress = 1;
break;
case 'd':
compress = 0;
break;
case 'h':
usage(stdout);
return 0;
case 'v':
version(stdout);
return 0;
case 'm':
memory = 1;
break;
// by yzhang for huffman statistics
case 't':
file_out_table = optarg;
break;
//end by yzhang
default:
usage(stderr);
return 1;
}
}
/* If an input file is given then open it. */
if(file_in)
{
in = fopen(file_in, "rb");
if(!in)
{
fprintf(stderr,
"Can't open input file '%s': %s\n",
file_in, strerror(errno));
return 1;
}
}
/* If an output file is given then create it. */
if(file_out)
{
out = fopen(file_out, "wb");
if(!out)
{
fprintf(stderr,
"Can't open output file '%s': %s\n",
file_out, strerror(errno));
return 1;
}
}
//by yzhang for huffman statistics
if(file_out_table)
{
outTable = fopen(file_out_table, "w");
if(!outTable)
{
fprintf(stderr,
"Can't open output file '%s': %s\n",
file_out_table, strerror(errno));
return 1;
}
}
//end by yzhang
if(memory)
{
return compress ?
memory_encode_file(in, out) : memory_decode_file(in, out);
}
if(compress) //change by yzhang
huffman_encode_file(in, out,outTable);//step1:changed by yzhang from huffman_encode_file(in, out) to huffman_encode_file(in, out,outTable)
else
huffman_decode_file(in, out);
if(in)
fclose(in);
if(out)
fclose(out);
if(outTable)