实验目的
掌握Huffman编解码实现的数据结构和实现框架, 进一步熟练使用C编程语言, 并完成压缩效率的分析。
实验原理
1.本实验中Huffman编码算法
(1)将文件以ASCII字符流的形式读入,统计每个符号的发生频率;
(2)将所有文件中出现过的字符按照频率从小到大的顺序排列;
(3)每一次选出最小的两个值,作为二叉树的两个叶子节点,将和作为它们的根节点,这两个叶子节点不再参与比较,新的根节点参与比较;
(4)重复3,直到最后得到和为1的根节点;
(5)将形成的二叉树的左节点标0,右节点标1,把从最上面的根节点到最下面的叶子节点途中遇到的0、1序列串起来,得到了各个字符的编码表示。
2.Huffman编码的数据结构设计
在程序实现中使用一种叫做二叉树的数据结构实现Huffman编码。
(1)哈夫曼节点结构
typedef struct huffman_node_tag
{
unsigned char isLeaf; //是否为树叶
unsigned long count; //节点代表的符号加权和
struct huffman_node_tag *parent; //父节点指针
union
{
struct
{
struct huffman_node_tag *zero, *one; //子节点指针,分别代表0,1子节点指针
};
unsigned char symbol; //节点代表的符号
};
} huffman_node;
(2)哈夫曼码结构
typedef struct huffman_code_tag
{
unsigned long numbits; //该码所用的比特数
unsigned char *bits; //指向该码比特串的指针
} huffman_code;
3.本实验中Huffman编码的流程
代码分析
实验中将实际完成编码工作的工程Huff_code封装成一个静态链接库,由工程huff_run来调用,huff_run完成的工作包括解析命令行参数,打开、读取、关闭输入文件,打开关闭输出文件,调用Huff_code完成编码。
Huff_run
huffcode.c
#include "huffman.h"
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <assert.h>
#ifdef WIN32
#include <malloc.h>
extern int getopt(int, char**, char*);
extern char* optarg;
#else
#include <unistd.h>
#endif
static int memory_encode_file(FILE *in, FILE *out);
static int memory_decode_file(FILE *in, FILE *out);
static void version(FILE *out)
{
fputs("huffcode 0.3\n"
"Copyright (C) 2003 Douglas Ryan Richardson"
"; Gauss Interprise, Inc\n",
out);
}
static void usage(FILE* out)
{
fputs("Usage: huffcode [-i<input file>] [-o<output file>] [-d|-c]\n"
"-i - input file (default is standard input)\n"
"-o - output file (default is standard output)\n"
"-d - decompress\n"
"-c - compress (default)\n"
"-m - read file into memory, compress, then write to file (not default)\n"
"-t - output huffman statistics\n",
out);
}
int main(int argc, char** argv)
{
char memory = 0;
char compress = 1;
int opt;
const char *file_in = NULL, *file_out = NULL;
const char *file_out_table = NULL;
FILE *in = stdin;
FILE *out = stdout;
FILE * outTable = NULL;
/* Get the command line arguments. */
while((opt = getopt(argc, argv, "i:o:cdhvmt:")) != -1) //演示如何跳出循环,及查找括号对
{
switch(opt)
{
case 'i':
file_in = optarg;
break;
case 'o':
file_out = optarg;
break;
case 'c':
compress = 1;
break;
case 'd':
compress = 0;
break;
case 'h':
usage(stdout);
return 0;
case 'v':
version(stdout);
return 0;
case 'm':
memory = 1;
break;
case 't':
file_out_table = optarg;
break;
default:
usage(stderr);
return 1;
}
}
/* If an input file is given then open it. */
if(file_in)
{
in = fopen(file_in, "rb");
if(!in)
{
fprintf(stderr,
"Can't open input file '%s': %s\n",
file_in, strerror(errno));
return 1;
}
}
/* If an output file is given then create it. */
if(file_out)
{
out = fopen(file_out, "wb");
if(!out)
{
fprintf(stderr,
"Can't open output file '%s': %s\n",
file_out, strerror(errno));
return 1;
}
}
if(file_out_table)
{
outTable = fopen(file_out_table, "w");
if(!outTable)
{
fprintf(stderr,
"Can't open output file '%s': %s\n",
file_out_table, strerror(errno));
return 1;
}
}
if(memory)
{
return compress ?
memory_encode_file(in, out) : memory_decode_file(in, out);
}
if(compress)
huffman_encode_file(in, out,outTable);
else
huffman_decode_file(in, out);
if(in)
fclose(in);
if(out)
fclose(out);
if(outTable)
fclose(outTable);
return 0;
}
static int memory_encode_file(FILE *in, FILE *out)
{
unsigned char *buf = NULL, *bufout = NULL;
unsigned int len = 0, cur = 0, inc = 1024, bufoutlen = 0;
assert(in && out);
/* Read the file into memory. */
while(!feof(in))
{
unsigned char *tmp;
len += inc;
tmp = (unsigned char*)realloc(buf, len);
if(!tmp)
{
if(buf)
free(buf);
return 1;
}
buf = tmp;
cur += fread(buf + cur, 1, inc, in);
}
if(!buf)
return 1;
/* Encode the memory. */
if(huffman_encode_memory(buf, cur, &bufout, &bufoutlen))
{
free(buf);
return 1;
}
free(buf);
/* Write the memory to the file. */
if(fwrite(bufout, 1, bufoutlen, out) != bufoutlen)
{
free(bufout);
return 1;
}
free(bufout);
return 0;
}
static int memory_decode_file(FILE *in, FILE *out)
{
unsigned char *buf = NULL, *bufout = NULL;
unsigned int len = 0, cur = 0, inc = 1024, bufoutlen = 0;
assert(in && out);
/* Read the file into memory. */
while(!feof(in))
{
unsigned char *tmp;
len += inc;
tmp = (unsigned char*)realloc(buf, len);
if(!tmp)
{
if(buf)
free(buf);
return 1;
}
buf = tmp;
cur += fread(buf + cur, 1, inc, in);
}
if(!buf)
return 1;
/* Decode the memory. */
if(huffman_decode_memory(buf, cur, &bufout, &bufoutlen))
{
free(buf);
return 1;
}
free(buf);
/* Write the memory to the file. */
if(fwrite(bufout, 1, bufoutlen, out) != bufoutlen)
{
free(bufout);
return 1;
}
free(bufout);
return 0;
}
getopt.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* declarations to provide consistent linkage */
extern char *optarg;
extern int optind;
extern int opterr;
int opterr = 1, /* if error message should be printed */
optind = 1, /* index into parent argv vector */
optopt, /* character checked for validity */
optreset; /* reset getopt */
char *optarg; /* argument associated with option */
#define BADCH (int)'?'
#define BADARG (int)':'
#define EMSG ""
/*
* getopt --
* Parse argc/argv argument vector.
*/
int
getopt(int nargc, char * const *nargv, const char* ostr)
{
static char *place = EMSG; /* option letter processing */
char *oli; /* option letter list index */
if (optreset || !*place) { /* update scanning pointer */
optreset = 0;
if (optind >= nargc || *(place = nargv[optind]) != '-') {
place = EMSG;
return (EOF);
}
if (place[1] && *++place == '-') { /* found "--" */
++optind;
place = EMSG;
return (EOF);
}
} /* option letter okay? */
if ((optopt = (int)*place++) == (int)':' ||
!(oli = strchr(ostr, optopt))) {
/*
* if the user didn't specify '-' as an option,
* assume it means EOF.
*/
if (optopt == (int)'-')
return (EOF);
if (!*place)
++optind;
if (opterr && *ostr != ':')
(void)fprintf(stderr,
"%s: illegal option -- %c\n", __FILE__, optopt);
return (BADCH);
}
if (*++oli != ':') { /* don't need argument */
optarg = NULL;
if (!*place)
++optind;
}
else { /* need an argument */