lzw算法c语言程序,LZW算法压缩c语言实现-CSDN博客

最近研究了下LZW算法，也看了很多这个方面的资料。LZW适合于文本文件，对于稍稍大点的流文件则出现压缩出来的文件大于源文件的情况。LZW有很多著名的实现程序，下面的程序以动态增加位数为出发点，利用哈希表来实现LZW的压缩。哈希算法有二个，一个被我注释掉，二个都可以用。具体哪个好，我自己也没有测试。

/**********************************************************************

***********************************************************************/

#include

#define hashsize 4096

#define clear 256 /*清除标志位*/

#define teminate 257

#define not_used -1

#define MAXVAL(n) (( 1 <

#define max_bits 12

FILE *in;

FILE *out;

int bitsize = 9;

int maxcode;

/*字典数据结构*/

typedef struct prex_cha{

int value; /*值*/

unsigned int prefix; /*字符串*/

unsigned int character; /*追加的字母*/

}Hash_Table;

Hash_Table Hash_table[hashsize];

void initial_hashtable ()/*字典初始化*/

{

int i;

for (i=0; i

{

Hash_table[i].value = not_used;

}

/*把数据改写成bit流输出*/

void output (unsigned int code)

{

static int count = 0;

static unsigned long buffer = 0L; /*buffer 为定义的存储字节的缓冲区*/

buffer |= (unsigned long) code << (32 - bitsize - count);

count += bitsize;

while (count >= 8) /*如果缓冲区大于8则输出里面的前8位*/

{

fputc(buffer >> 24,out);

buffer <<= 8;

count -= 8;

}

int find_match(int prefix,unsigned int character)

{

int index;

int offset; /*offset为偏移位*/

/* index = (character << 4) ^ prefix; /*用异或来决定index*/

/* if (index == 0)

{

offset = 1;

}

else

{

offset = 5021 - index;

}

while (1)

{

if (Hash_table[index].value == not_used)

{

return (index);

}

if (Hash_table[index].prefix == prefix && Hash_table[index].character == character)

{

return (index);

}

index -= offset;

if (index < 0)

{

index += 5021;

}

int find_match(int prefix,unsigned int character)

{

int index;

index = prefix % 4096;

while (1)

{

if(Hash_table[index].value == not_used)

{

return (index);

}

if(Hash_table[index].prefix == prefix && Hash_table[index].character == character)

{

return (index);

}

index = index + 1;

if(index >= 4096)

{

index = index - 4096;

}

void lzwcompression ()

{

unsigned int prefix;

unsigned int character;

unsigned int index;

unsigned int next_code = 258; /* 当前字典的标号*/

initial_hashtable ();

prefix = fgetc (in);

while ((character = fgetc(in)) !=(unsigned) EOF)

{

index = find_match (prefix,character);

if (Hash_table[index].value != not_used)/*能够找到*/

{

prefix = Hash_table[index].value;

}

else

{

if(next_code <= maxcode)/*不能找到，是新的字符串，则添加到表中*/

{

Hash_table[index].value = next_code++;

Hash_table[index].character = character;

Hash_table[index].prefix = prefix;

}

output(prefix);

prefix = character; /*把后缀给前缀，准备下次输入*/

/*特殊标志，当位数必须增加时*/

if(next_code > maxcode)

{

if(bitsize < 12)

{

maxcode = MAXVAL(++bitsize);

}

else /*达到4096时候，必须清除哈希表，重新开始*/

{

output (256); /*输出清除标志到文件中*/

initial_hashtable();

next_code = 258;

bitsize = 9;

maxcode = MAXVAL(bitsize);/*maxcode 变为511*/

}

}/*if－else结束*/

}/*while结束*/

output(prefix); /*输出最后一个*/

if (next_code == maxcode)

{ /* 如果在最后的哈息表刚好刚好是maxcode，则也必须把位数增加一位 */

++bitsize;

}

output(257); /* 输出结束标志*/

output(0); /* 解码时要用到 */

output(0);

}

int main(int argc,char* argv[])

{

char filename[255];

if (argc < 2)

{

printf("usage:the command format is:lzw_compression !");

return(1);

}

if( (in = fopen(argv[1], "rb")) == NULL)

{

printf ("Cannot open input file - %s/n", argv[1]);

exit (1);

}

strcpy(filename, argv[1]);

/*加上后缀.zzz,表明是压缩文件*/

strncat(filename, ".zzz", 4);

if ((out = fopen(filename, "wb")) == NULL)

{

printf("Cannot open output file - %s/n", filename);

fclose(in);

exit(1);

}

maxcode = MAXVAL(bitsize);

lzwcompression();

fclose (in);

fclose (out);

return 0;

}