【数据压缩】--LZW算法

最新推荐文章于 2025-05-04 09:39:59 发布

m0_54222259

最新推荐文章于 2025-05-04 09:39:59 发布

阅读量2.1k

点赞数

CC 4.0 BY-SA版权

文章标签： c语言

本文链接：https://blog.csdn.net/m0_54222259/article/details/124294998

本文详细解析了LZW算法的工作原理，包括编码和解码过程，重点讨论了数据结构的设计，压缩效率的分析，以及针对不同文件类型的压缩效果。特别关注了字符重复率高和词典大小调整对压缩效率的影响，以及LZW算法的优缺点和适用场景。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

LZW算法原理

LZW编码

编码算法的思想

步骤1：将词典初始化为包含所有可能的单字符，当前前缀P初始化为空。
步骤2：当前字符C=字符流中的下一个字符。
步骤3：判断P+C是否在词典中：
（1）如果是，则用C扩展P，即P=P+C，返回到步骤2.
（2）如果否，则输出与当前前缀P相对应的码字W;将P+C添加到词典中；令P=C,并返回到步骤2.

数据结构分析


尾缀字符	suffix
母节点	parent
第一个孩子节点	firstchild
下一个兄弟节点	nextsibling

树用数组表示，数组下标用pointer表示，dict[pointer]表示一个节点：
dict[pointer].suffix
dict[pointer].parent
dict[pointer].firstchild
dict[pointer].nextsibling

struct {
	int suffix;
	int parent, firstchild, nextsibling;
} dictionary[MAX_CODE+1];
int next_code;
int d_stack[MAX_CODE]; // stack for decoding a phrase

主要功能模块

初始化词典

void InitDictionary( void){//初始化词典，将0-255根节点初始化
	int i;

	for( i=0; i<256; i++){
		dictionary[i].suffix = i;//尾缀字符
		dictionary[i].parent = -1;//母节点初始化为空
		dictionary[i].firstchild = -1;//子节点初始化为空
		dictionary[i].nextsibling = i+1;//下一个兄弟节点
	}
	dictionary[255].nextsibling = -1;//最后一个节点无下一个兄弟
	next_code = 256;//新词条编码为256
}

查找词典中是否有字符串

将当前字符依次与当前前缀的孩子节点进行比较，相同时说明词典中有该字符串，否则说明词典中没有该字符串，返回-1。

int InDictionary( int character, int string_code){//查找词典中是否有字符串
	int sibling;
	if( 0>string_code) return character;//string_code==-1,当前词条无前缀，为单个字符，初始化后已经在词典中，返回此字符
	sibling = dictionary[string_code].firstchild;//如果不是单个字符，找当前前缀的第一个孩子节点
	while( -1<sibling){
		if( character == dictionary[sibling].suffix) return sibling;//如果此孩子节点的尾缀字符等于character，则当前词条在词典中，返回此孩子节点
		sibling = dictionary[sibling].nextsibling;//否则，找当前前缀的下一个孩子节点
	}
	return -1;//没有找到，返回-1
}

将新串加入词典

将新串加入词典，则需要添加新节点，设置新节点的suffix,parent,nextsibling,firstchild，另外还需建立新节点与当前前缀其他孩子节点的关系。

void AddToDictionary( int character, int string_code){//将新串加入词典
	int firstsibling, nextsibling;
	if( 0>string_code) return;//string_code==-1,当前词条无前缀，为单个字符，初始化后已经在词典中
	dictionary[next_code].suffix = character;//添加新节点dictionary[next_code]，其尾缀字符为character
	dictionary[next_code].parent = string_code;//新节点的母节点为string_code
	dictionary[next_code].nextsibling = -1;//新节点还没有下一个兄弟
	dictionary[next_code].firstchild = -1;//新节点还没有孩子
	firstsibling = dictionary[string_code].firstchild;//firstsibling为当前前缀的第一个孩子
	if( -1<firstsibling){	// 如果当前前缀有孩子
		nextsibling = firstsibling;
		while( -1<dictionary[nextsibling].nextsibling ) //只要nextsibling还有下一个兄弟
			nextsibling = dictionary[nextsibling].nextsibling;//让nextsibling替换为它的下一个兄弟，这样可以找到当前前缀的最后一个孩子
		dictionary[nextsibling].nextsibling = next_code;//当前前缀的最后一个孩子的下一个兄弟为新节点
	}else{// 当前前缀无孩子,则新节点为它的第一个孩子
		dictionary[string_code].firstchild = next_code;
	}
	next_code ++;//为下一个新词条所用
}

编码

如果当前字符串在词典中，将当前字符串设为前缀。如果当前字符串不在词典中，将当前字符串写入词典中，输出当前字符串前缀对应的码字，当前字符设为前缀。详见代码注释。

void LZWEncode( FILE *fp, BITFILE *bf){//LZW编码
	int character;//当前字符C
	int string_code;//当前前缀P
	int index;//定义索引
	unsigned long file_length;//定义文件长度

	fseek( fp, 0, SEEK_END);//将指针置于文件末尾
	file_length = ftell( fp);//得到当前文件长度
	fseek( fp, 0, SEEK_SET);//将指针移回文件开始
	BitsOutput( bf, file_length, 4*8);
	InitDictionary();//初始化词典
	string_code = -1;//初始化后的词典为单个字符，无前缀
	while( EOF!=(character=fgetc( fp))){//EOF：END OF FILE，每次从文件中读取一个字符，直到文件的最后一个字符
		index = InDictionary( character, string_code);//查找当前字符串是否在词典中，如果不在，则index=-1,如果在，则返回的是当前字符串的后缀（单个字符时为该字符），index>=0
		if( 0<=index){	// string+character in dictionary  如果当前字符串在词典中
			string_code = index;//将当前字符串设为前缀
		}else{	// string+character not in dictionary  如果当前字符串不在词典中
			output( bf, string_code);//输出当前前缀对应的码字
			if( MAX_CODE > next_code){	// free space in dictionary，只要未超过词典容限
				// add string+character to dictionary
				AddToDictionary( character, string_code);//将当前字符串加入到词典中
			}
			string_code = character;//将当前字符C设为前缀P
		}
	}
	output( bf, string_code);//最后一个前缀无下一个字符，将其对应的码字输出
}

LZW解码

解码算法的思想

步骤1：在开始译码时词典包含所有可能的前缀根。
步骤2：令CW=码字流中的第一个码字
步骤3：输出CW对应的字符到字符流
步骤4：先前码字PW=当前码字CW
步骤5：当前码字CW等于码字流中的下一个码字
步骤5：判断CW是否在词典中
（1）如果是，将CW对应的字符串输出到字符流中，当前前缀P=PW对应的字符串，当前字符C=CW对应字符串中的第一个字符，将P+C添加到词典中，先前码字PW=当前码字CW;
（2）如果否，即如果CW不在词典中，说明该词条刚写入词典就被使用，该字符串的第一个字符等于最后一个字符，当前前缀P=PW对应的字符串，当前字符C=PW对应字符串中的第一个字符，将P+C添加到词典中，将P+C输出到字符流中，先前码字PW=当前码字CW;
步骤7：判断码字流中是否还有码字要译。
（1）如果”是”，就返回步骤4。
（2）如果”否”，结束。

解码

当前码字不在词典中的情况：说明一个词条刚写入词典就被使用，举例：
在这里插入图片描述
“aba”被写入词典后，输出“ab”对应的码字，'“a”作为新的前缀，判断“ab”在词典中，则“ab”作为新的前缀，判断"aba"在词典中，则刚写入词典的“aba”对应的码字被输出。但在解码端，收到码字就开始解码，解码到256时，"ba“刚被写入词典中，此时词典中还未写入”aba“对应的码字。只有编码端刚写入的词条就被使用时才会发生这种情况，而只有该字符串的第一个字符等于最后一个字符时，这种情况才会发生。因此，当前码字不在词典中时，说明当前码字对应的字符串为先前码字对应的字符串+先前码字对应字符串的第一个字符。

LZWDecode

首先初始化词典，如果当前码字不在词典中，将character赋给d_stack[0]，并在d_stack[]中存储先前码字对应的字符串（倒序存放），如果当前码字在词典中，在d_stack[]中存储当前码字对应字符串（倒序存放），则可以得到先前码字或当前码字的第一个字符d_stack[phrase_length-1]。当前码字不在词典中时，新词条为当前前缀+当前前缀的第一个字符，将先前码字对应的字符串+先前码字对应字符串的第一个字符输出到字符流中。当前码字在词典中时，将当前码字对应的字符串输出到字符流中，新词条为当前前缀+当前字符串的第一个字符。最后将当前码字设为先前码字。

void LZWDecode( BITFILE *bf, FILE *fp){//LZW解码
	//需填充
	int character;//定义当前字符
	int new_code, last_code;//定义当前码字和先前码字
	int phrase_length;//定义字符串长度
	unsigned long file_length;//定义输出文件长度

	file_length = BitsInput(bf, 4 * 8);//解码后文件的大小
	if (-1 == file_length)
	{
		file_length = 0;
	}
	InitDictionary();//初始化词典，使在开始译码时词典包含所有可能的前缀根
	last_code = -1;//译码前无先前码字
	while (file_length > 0)//file_length > 0表示解码未完成，继续解码
	{
		new_code = input(bf);//从输入文件中读出一个码字，为当前码字
		if (new_code >= next_code)//如果当前码字不在词典中
		{
			d_stack[0] = character;//先将character赋给d_stack[0]
			phrase_length = DecodeString(1, last_code);//从d_stack[1]开始存储先前码字对应的字符串，
			//d_stack[1]存储的是先前码字对应的字符串的最后一个字符，phrase_length为先前码字对应字符串的长度+1
			//d_stack[phrase_length-1]存储的是先前码字对应字符串的第一个字符
		}
		else//如果当前码字在词典中
		{
			phrase_length = DecodeString(0, new_code);//从d_stack[0]开始存储当前码字对应的字符串，
			//d_stack[0]存储的是当前码字对应字符串的最后一个字符,phrase_length为当前码字对应字符串的长度
			// d_stack[phrase_length-1]存储的是当前码字对应字符串的第一个字符
		}
		character = d_stack[phrase_length-1];
		//当前码字不在词典中时，character为先前码字对应字符串的第一个字符
		//当前码字在词典中时，character为当前码字对应字符串的第一个字符
		while (0 < phrase_length)//循环，直到d_stack[]全部输出
		{
			phrase_length--;
			fputc(d_stack[phrase_length],fp);//d_stack[]为倒序存放，这样可以正序输出
		//当前码字不在词典中时，将先前码字对应的字符串+先前码字对应字符串的第一个字符输出到字符流中
		//当前码字在词典中时，将当前码字对应的字符串输出到字符流中
			file_length--;
		}
		if (MAX_CODE > next_code)//未超过词典容限时
		{
			AddToDictionary(character, last_code);
			//当前码字不在词典中时，新词条为当前前缀+当前前缀的第一个字符
			//当前码字在词典中时，新词条为当前前缀+当前字符串的第一个字符
		}
		last_code = new_code;//将当前码字设为先前码字
	}
}

DecodeString

int DecodeString( int start, int code){
	//需填充
	int count;
	count = start;
	while (0 <= code)//从最后一个节点开始，向上搜索并存入stack[]中，直到找到最后一个母节点
	{
		d_stack[count] = dictionary[code].suffix;
		code = dictionary[code].parent;
		count++;//记录次数
	}
	return count;//count=code对应字符串的长度+start
}

调试程序

代码补充完整后，我们首先用课件上的数据进行实验。新建文本文件test1.txt，内容为“abbababac”
编码后的词典应为：
在这里插入图片描述
设置命令参数，待编码文件为test1.txt，编码后文件为test1.dat，dat文件多为程序使用的数据，它只是开发者为了清楚文件的意思而自己定义的。

编码

在这里插入图片描述
调用PrintDictionary()函数来输出256之后的词典

void PrintDictionary( void){
	int n;
	int count;
	for( n=256; n<next_code; n++){
		count = DecodeString( 0, n);
		printf( "%4d->", n);
		while( 0<count--) printf("%c", (char)(d_stack[count]));
		printf( "\n");
	}
}

在这里插入图片描述
得到的结果和理论相同。编码正确。

解码

接下来，我们用编码后得到的test1.dat进行解码，设置命令参数：
在这里插入图片描述
运行程序后，得到解码后的文件testd.txt，内容与编码前的内容相同。说明解码正确。

压缩效率分析

十种不同格式文件的压缩效率

准备十种不同格式的文件，使用LZW编码器进行压缩得到输出的压缩比特流文件。
在这里插入图片描述

压缩后的文件：
在这里插入图片描述
将压缩后文件的格式，数据量，压缩后文件的数据量，以及编码词典总数写入表格中进行比较：

更改词典总数上限后的编码效率

不同格式的文件经LZW编码器压缩后，压缩效率有很大的不同。仅有7.rgb经过LZW编码后数据量减小了，其余九个文件经LZW压缩编码后数据量反而上升。本次实验词典总数上限为65535，其中第3，4，6，9，10个文件词典编码总数均已达到上限，因此我们增大词典总数上限，将MAXCODE设为200000，继续进行LZW编码，得到如下结果：
在这里插入图片描述

增大词典总数上限后，数据量较第一次压缩后的结果更小了，但除了6.avi数据量较原文件有所减小，其他文件压缩后的数据量还是大于原文件。对于词典编码总数仍达到上限的文件，可能是MAXCODE的值仍旧太小，数据量过大，导致压缩算法未充分利用。词典编码总数未达到上界，压缩后数据量大于原文件的，可能是文件中重复字符的概率小，LZW压缩算法不能发挥优势。

字符重复率很高的文本文档

新建一个文本文档，内容为重复率很高的字符
在这里插入图片描述
使用LZW编码器进行压缩后，数据量有了很明显的减小

在这里插入图片描述

LZW算法的优缺点

由此可见，字符串重复率低时，压缩效率受到很大的影响，这是LZW算法的一大局限。它的优点是只需一遍扫描，具有自适应的特点，算法简单，便于快速实现。

程序代码

bitio.h

/*
 * Declaration for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */
#ifndef __BITIO__
#define __BITIO__

#include <stdio.h>

typedef struct{
	FILE *fp;
	unsigned char mask;
	int rack;
}BITFILE;

BITFILE *OpenBitFileInput( char *filename);
BITFILE *OpenBitFileOutput( char *filename);
void CloseBitFileInput( BITFILE *bf);
void CloseBitFileOutput( BITFILE *bf);
int BitInput( BITFILE *bf);
unsigned long BitsInput( BITFILE *bf, int count);
void BitOutput( BITFILE *bf, int bit);
void BitsOutput( BITFILE *bf, unsigned long code, int count);
#endif	// __BITIO__

bitio.c

/*
 * Definitions for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */

#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
BITFILE *OpenBitFileInput( char *filename){
	BITFILE *bf;
	bf = (BITFILE *)malloc( sizeof(BITFILE));
	if( NULL == bf) return NULL;
	if( NULL == filename)	bf->fp = stdin;
	else bf->fp = fopen( filename, "rb");
	if( NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

BITFILE *OpenBitFileOutput( char *filename){
	BITFILE *bf;
	bf = (BITFILE *)malloc( sizeof(BITFILE));
	if( NULL == bf) return NULL;
	if( NULL == filename)	bf->fp = stdout;
	else bf->fp = fopen( filename, "wb");
	if( NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

void CloseBitFileInput( BITFILE *bf){
	fclose( bf->fp);
	free( bf);
}

void CloseBitFileOutput( BITFILE *bf){
	// Output the remaining bits
	if( 0x80 != bf->mask) fputc( bf->rack, bf->fp);
	fclose( bf->fp);
	free( bf);
}

int BitInput( BITFILE *bf){
	int value;

	if( 0x80 == bf->mask){
		bf->rack = fgetc( bf->fp);
		if( EOF == bf->rack){
			fprintf(stderr, "Read after the end of file reached\n");
			exit( -1);
		}
	}
	value = bf->mask & bf->rack;
	bf->mask >>= 1;
	if( 0==bf->mask) bf->mask = 0x80;
	return( (0==value)?0:1);
}

unsigned long BitsInput( BITFILE *bf, int count){
	unsigned long mask;
	unsigned long value;
	mask = 1L << (count-1);
	value = 0L;
	while( 0!=mask){
		if( 1 == BitInput( bf))
			value |= mask;
		mask >>= 1;
	}
	return value;
}

void BitOutput( BITFILE *bf, int bit){
	if( 0 != bit) bf->rack |= bf->mask;
	bf->mask >>= 1;
	if( 0 == bf->mask){	// eight bits in rack
		fputc( bf->rack, bf->fp);
		bf->rack = 0;
		bf->mask = 0x80;
	}
}

void BitsOutput( BITFILE *bf, unsigned long code, int count){
	unsigned long mask;

	mask = 1L << (count-1);
	while( 0 != mask){
		BitOutput( bf, (int)(0==(code&mask)?0:1));
		mask >>= 1;
	}
}
#if 0
int main( int argc, char **argv){
	BITFILE *bfi, *bfo;
	int bit;
	int count = 0;

	if( 1<argc){
		if( NULL==OpenBitFileInput( bfi, argv[1])){
			fprintf( stderr, "fail open the file\n");
			return -1;
		}
	}else{
		if( NULL==OpenBitFileInput( bfi, NULL)){
			fprintf( stderr, "fail open stdin\n");
			return -2;
		}
	}
	if( 2<argc){
		if( NULL==OpenBitFileOutput( bfo, argv[2])){
			fprintf( stderr, "fail open file for output\n");
			return -3;
		}
	}else{
		if( NULL==OpenBitFileOutput( bfo, NULL)){
			fprintf( stderr, "fail open stdout\n");
			return -4;
		}
	}
	while( 1){
		bit = BitInput( bfi);
		fprintf( stderr, "%d", bit);
		count ++;
		if( 0==(count&7))fprintf( stderr, " ");
		BitOutput( bfo, bit);
	}
	return 0;
}
#endif

lzw.c

/*
 * Definition for LZW coding 
 *
 * vim: ts=4 sw=4 cindent nowrap
 */
#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
#define MAX_CODE 65535
//#define MAX_CODE 200000

struct {
	int suffix;
	int parent, firstchild, nextsibling;
} dictionary[MAX_CODE+1];
int next_code;
int d_stack[MAX_CODE]; // stack for decoding a phrase

#define input(f) ((int)BitsInput( f, 16))
#define output(f, x) BitsOutput( f, (unsigned long)(x), 16)

int DecodeString( int start, int code);
void InitDictionary( void);
void PrintDictionary( void){
	int n;
	int count;
	for( n=256; n<next_code; n++){
		count = DecodeString( 0, n);
		printf( "%4d->", n);
		while( 0<count--) printf("%c", (char)(d_stack[count]));
		printf( "\n");
	}
}

int DecodeString( int start, int code){
	//需填充
	int count;
	count = start;
	while (0 <= code)//从最后一个节点开始，向上搜索并存入stack[]中，直到找到最后一个母节点
	{
		d_stack[count] = dictionary[code].suffix;
		code = dictionary[code].parent;
		count++;//记录次数
	}
	return count;//count=code对应字符串的长度+start
}
void InitDictionary( void){//初始化词典，将0-255根节点初始化
	int i;

	for( i=0; i<256; i++){
		dictionary[i].suffix = i;//尾缀字符
		dictionary[i].parent = -1;//母节点初始化为空
		dictionary[i].firstchild = -1;//子节点初始化为空
		dictionary[i].nextsibling = i+1;//下一个兄弟节点
	}
	dictionary[255].nextsibling = -1;//最后一个节点无下一个兄弟
	next_code = 256;//新词条编码为256
}
/*
 * Input: string represented by string_code in dictionary,
 * Output: the index of character+string in the dictionary
 * 		index = -1 if not found
 */
int InDictionary( int character, int string_code){//查找词典中是否有字符串
	int sibling;
	if( 0>string_code) return character;//string_code==-1,当前词条无前缀，为单个字符，初始化后已经在词典中，返回此字符
	sibling = dictionary[string_code].firstchild;//如果不是单个字符，找当前前缀的第一个孩子节点
	while( -1<sibling){
		if( character == dictionary[sibling].suffix) return sibling;//如果此孩子节点的尾缀字符等于character，则当前词条在词典中，返回此孩子节点
		sibling = dictionary[sibling].nextsibling;//否则，找当前前缀的下一个孩子节点
	}
	return -1;//没有找到，返回-1
}

void AddToDictionary( int character, int string_code){//将新串加入词典
	int firstsibling, nextsibling;
	if( 0>string_code) return;//string_code==-1,当前词条无前缀，为单个字符，初始化后已经在词典中
	dictionary[next_code].suffix = character;//添加新节点dictionary[next_code]，其尾缀字符为character
	dictionary[next_code].parent = string_code;//新节点的母节点为string_code
	dictionary[next_code].nextsibling = -1;//新节点还没有下一个兄弟
	dictionary[next_code].firstchild = -1;//新节点还没有孩子
	firstsibling = dictionary[string_code].firstchild;//firstsibling为当前前缀的第一个孩子
	if( -1<firstsibling){	// 如果当前前缀有孩子
		nextsibling = firstsibling;
		while( -1<dictionary[nextsibling].nextsibling ) //只要nextsibling还有下一个兄弟
			nextsibling = dictionary[nextsibling].nextsibling;//让nextsibling替换为它的下一个兄弟，这样可以找到当前前缀的最后一个孩子
		dictionary[nextsibling].nextsibling = next_code;//当前前缀的最后一个孩子的下一个兄弟为新节点
	}else{// 当前前缀无孩子,则新节点为它的第一个孩子
		dictionary[string_code].firstchild = next_code;
	}
	next_code ++;//为下一个新词条所用
}

void LZWEncode( FILE *fp, BITFILE *bf){//LZW编码
	int character;//当前字符C
	int string_code;//当前前缀P
	int index;//定义索引
	unsigned long file_length;//定义文件长度

	fseek( fp, 0, SEEK_END);//将指针置于文件末尾
	file_length = ftell( fp);//得到当前文件长度
	fseek( fp, 0, SEEK_SET);//将指针移回文件开始
	BitsOutput( bf, file_length, 4*8);
	InitDictionary();//初始化词典
	string_code = -1;//初始化后的词典为单个字符，无前缀
	while( EOF!=(character=fgetc( fp))){//EOF：END OF FILE，每次从文件中读取一个字符，直到文件的最后一个字符
		index = InDictionary( character, string_code);//查找当前字符串是否在词典中，如果不在，则index=-1,如果在，则返回的是当前字符串的后缀（单个字符时为该字符），index>=0
		if( 0<=index){	// string+character in dictionary  如果当前字符串在词典中
			string_code = index;//将当前字符串初始化为前缀
		}else{	// string+character not in dictionary  如果当前字符串不在词典中
			output( bf, string_code);//输出当前前缀对应的码字
			if( MAX_CODE > next_code){	// free space in dictionary，只要未超过词典容限
				// add string+character to dictionary
				AddToDictionary( character, string_code);//将当前字符串加入到词典中
			}
			string_code = character;//将当前字符C设为前缀P
		}
	}
	output( bf, string_code);//最后一个前缀无下一个字符，将其对应的码字输出
	PrintDictionary();
}

void LZWDecode( BITFILE *bf, FILE *fp){//LZW解码
	//需填充
	int character;//定义当前字符
	int new_code, last_code;//定义当前码字和先前码字
	int phrase_length;//定义字符串长度
	unsigned long file_length;//定义输出文件长度

	file_length = BitsInput(bf, 4 * 8);//解码后文件的大小
	if (-1 == file_length)
	{
		file_length = 0;
	}
	InitDictionary();//初始化词典，使在开始译码时词典包含所有可能的前缀根
	last_code = -1;//译码前无先前码字
	while (file_length > 0)//file_length > 0表示解码未完成，继续解码
	{
		new_code = input(bf);//从输入文件中读出一个码字，为当前码字
		if (new_code >= next_code)//如果当前码字不在词典中
		{
			d_stack[0] = character;//先将character赋给d_stack[0]
			phrase_length = DecodeString(1, last_code);//从d_stack[1]开始存储先前码字对应的字符串，
			//d_stack[1]存储的是先前码字对应的字符串的最后一个字符，phrase_length为先前码字对应字符串的长度+1
			//d_stack[phrase_length-1]存储的是先前码字对应字符串的第一个字符
		}
		else//如果当前码字在词典中
		{
			phrase_length = DecodeString(0, new_code);//从d_stack[0]开始存储当前码字对应的字符串，
			//d_stack[0]存储的是当前码字对应字符串的最后一个字符,phrase_length为当前码字对应字符串的长度
			// d_stack[phrase_length-1]存储的是当前码字对应字符串的第一个字符
		}
		character = d_stack[phrase_length-1];
		//当前码字不在词典中时，character为先前码字对应字符串的第一个字符
		//当前码字在词典中时，character为当前码字对应字符串的第一个字符
		while (0 < phrase_length)//循环，直到d_stack[]全部输出
		{
			phrase_length--;
			fputc(d_stack[phrase_length],fp);//d_stack[]为倒序存放，这样可以正序输出
		//当前码字不在词典中时，将先前码字对应的字符串+先前码字对应字符串的第一个字符输出到字符流中
		//当前码字在词典中时，将当前码字对应的字符串输出到字符流中
			file_length--;
		}
		if (MAX_CODE > next_code)//未超过词典容限时
		{
			AddToDictionary(character, last_code);
			//当前码字不在词典中时，新词条为当前前缀+当前前缀的第一个字符
			//当前码字在词典中时，新词条为当前前缀+当前字符串的第一个字符
		}
		last_code = new_code;//将当前码字设为先前码字
	}
}



int main( int argc, char **argv){
	FILE *fp;
	BITFILE *bf;

	if( 4>argc){
		fprintf( stdout, "usage: \n%s <o> <ifile> <ofile>\n", argv[0]);
		fprintf( stdout, "\t<o>: E or D reffers encode or decode\n");
		fprintf( stdout, "\t<ifile>: input file name\n");
		fprintf( stdout, "\t<ofile>: output file name\n");
		return -1;
	}
	if( 'E' == argv[1][0]){ // do encoding
		fp = fopen( argv[2], "rb");
		bf = OpenBitFileOutput( argv[3]);
		if( NULL!=fp && NULL!=bf){
			LZWEncode( fp, bf);
			fclose( fp);
			CloseBitFileOutput( bf);
			fprintf( stdout, "encoding done\n");
		}
	}else if( 'D' == argv[1][0]){	// do decoding
		bf = OpenBitFileInput( argv[2]);
		fp = fopen( argv[3], "wb");
		if( NULL!=fp && NULL!=bf){
			LZWDecode( bf, fp);
			fclose( fp);
			CloseBitFileInput( bf);
			fprintf( stdout, "decoding done\n");
		}
	}else{	// otherwise
		fprintf( stderr, "not supported operation\n");
	}
	return 0;
}