lzw 压缩算法源代码

最新推荐文章于 2022-04-29 20:01:09 发布

free2o

最新推荐文章于 2022-04-29 20:01:09 发布

阅读量2.4k

点赞数

分类专栏：算法和数据结构学习文章标签：算法 character output input file string

算法和数据结构学习专栏收录该内容

31 篇文章 1 订阅

订阅专栏

lzw 压缩算法源代码：

/* *******************************************************************
**
** Copyright (c) 1989 Mark R. Nelson
**
** LZW data compression/expansion demonstration program.
**
** April 13, 1989
**
** Minor mods made 7/19/2006 to conform with ANSI-C - prototypes, casting,
** and argument agreement.
**
**************************************************************************** */
#include < stdio.h >
#include < stdlib.h >
#include < string .h >

#define BITS 12                   /* Setting the number of bits to 12, 13*/
#define HASHING_SHIFT (BITS-8)    /* or 14 affects several constants.    */
#define MAX_VALUE (1 << BITS) - 1 /* Note that MS-DOS machines need to   */
#define MAX_CODE MAX_VALUE - 1    /* compile their code in large model if*/
                                   /* 14 bits are selected.                */
#if BITS == 14
   #define TABLE_SIZE 18041        /* The string table size needs to be a */
#endif                             /* prime number that is somewhat larger*/
#if BITS == 13                    /* than 2**BITS.                       */
   #define TABLE_SIZE 9029
#endif
#if BITS <= 12
   #define TABLE_SIZE 5021
#endif

void * malloc();

int * code_value;                   /* This is the code value array         */
unsigned int * prefix_code;         /* This array holds the prefix codes    */
unsigned char * append_character;   /* This array holds the appended chars */
unsigned char decode_stack[ 4000 ]; /* This array holds the decoded string */

/*
* Forward declarations
*/
void compress(FILE * input,FILE * output);
void expand(FILE * input,FILE * output);
int find_match( int hash_prefix,unsigned int hash_character);
void output_code(FILE * output,unsigned int code);
unsigned int input_code(FILE * input);
unsigned char * decode_string(unsigned char * buffer,unsigned int code);

/* *******************************************************************
**
** This program gets a file name from the command line.  It compresses the
** file, placing its output in a file named test.lzw.  It then expands
** test.lzw into test.out.  Test.out should then be an exact duplicate of
** the input file.
**
************************************************************************ */

main( int argc, char * argv[])
{
FILE * input_file;
FILE * output_file;
FILE * lzw_file;
char input_file_name[ 81 ];

/*
**  The three buffers are needed for the compression phase.
*/
  code_value = ( int * )malloc(TABLE_SIZE * sizeof ( int ));
  prefix_code = (unsigned int * )malloc(TABLE_SIZE * sizeof (unsigned int ));
  append_character = (unsigned char * )malloc(TABLE_SIZE * sizeof (unsigned char ));
   if (code_value == NULL || prefix_code == NULL || append_character == NULL)
  {
    printf( " Fatal error allocating table space! " );
    exit( - 1 );
  }
/*
** Get the file name, open it up, and open up the lzw output file.
*/
   if (argc > 1 )
    strcpy(input_file_name,argv[ 1 ]);
   else
  {
    printf( " Input file name? " );
    scanf( " %s " ,input_file_name);
  }
  input_file = fopen(input_file_name, " rb " );
  lzw_file = fopen( " test.lzw " , " wb " );
   if (input_file == NULL || lzw_file == NULL)
  {
    printf( " Fatal error opening files. " );
    exit( - 1 );
  };
/*
** Compress the file.
*/
  compress(input_file,lzw_file);
  fclose(input_file);
  fclose(lzw_file);
  free(code_value);
/*
** Now open the files for the expansion.
*/
  lzw_file = fopen( " test.lzw " , " rb " );
  output_file = fopen( " test.out " , " wb " );
   if (lzw_file == NULL || output_file == NULL)
  {
    printf( " Fatal error opening files. " );
    exit( - 2 );
  };
/*
** Expand the file.
*/
  expand(lzw_file,output_file);
  fclose(lzw_file);
  fclose(output_file);

  free(prefix_code);
  free(append_character);
}

/*
** This is the compression routine.  The code should be a fairly close
** match to the algorithm accompanying the article.
**
*/

void compress(FILE * input,FILE * output)
{
unsigned int next_code;
unsigned int character;
unsigned int string_code;
unsigned int index;
int i;

  next_code = 256 ;               /* Next code is the next available string code */
   for (i = 0 ;i < TABLE_SIZE;i ++ )   /* Clear out the string table before starting */
    code_value[i] =- 1 ;

  i = 0 ;
  printf( " Compressing... " );
  string_code = getc(input);     /* Get the first code                          */
/*
** This is the main loop where it all happens.  This loop runs util all of
** the input has been exhausted.  Note that it stops adding codes to the
** table after all of the possible codes have been defined.
*/
   while ((character = getc(input)) != (unsigned)EOF)
  {
     if ( ++ i == 1000 )                          /* Print a * every 1000     */
    {                                       /* input characters.  This */
      i = 0 ;                                  /* is just a pacifier.      */
      printf( " * " );
    }
    index = find_match(string_code,character); /* See if the string is in */
     if (code_value[index] != - 1 )             /* the table.  If it is,    */
      string_code = code_value[index];         /* get the code value.  If */
     else                                      /* the string is not in the */
    {                                        /* table, try to add it.    */
       if (next_code <= MAX_CODE)
      {
        code_value[index] = next_code ++ ;
        prefix_code[index] = string_code;
        append_character[index] = character;
      }
      output_code(output,string_code);   /* When a string is found   */
      string_code = character;             /* that is not in the table */
    }                                    /* I output the last string */
  }                                      /* after adding the new one */
/*
** End of the main loop.
*/
  output_code(output,string_code); /* Output the last code                */
  output_code(output,MAX_VALUE);    /* Output the end of buffer code       */
  output_code(output, 0 );            /* This code flushes the output buffer */
  printf( " " );
}

/*
** This is the hashing routine.  It tries to find a match for the prefix+char
** string in the string table.  If it finds it, the index is returned.  If
** the string is not found, the first available index in the string table is
** returned instead.
*/

int find_match( int hash_prefix,unsigned int hash_character)
{
int index;
int offset;

  index = (hash_character << HASHING_SHIFT) ^ hash_prefix;
   if (index == 0 )
    offset = 1 ;
   else
    offset = TABLE_SIZE - index;
   while ( 1 )
  {
     if (code_value[index] == - 1 )
       return (index);
     if (prefix_code[index] == hash_prefix &&
        append_character[index] == hash_character)
       return (index);
    index -= offset;
     if (index < 0 )
      index += TABLE_SIZE;
  }
}

/*
**  This is the expansion routine.  It takes an LZW format file, and expands
**  it to an output file.  The code here should be a fairly close match to
**  the algorithm in the accompanying article.
*/

void expand(FILE * input,FILE * output)
{
unsigned int next_code;
unsigned int new_code;
unsigned int old_code;
int character;
int counter;
unsigned char * string ;

  next_code = 256 ;            /* This is the next available code to define */
  counter = 0 ;                /* Counter is used as a pacifier.             */
  printf( " Expanding... " );

  old_code = input_code(input);   /* Read in the first code, initialize the */
  character = old_code;           /* character variable, and send the first */
  putc(old_code,output);        /* code to the output file                 */
/*
**  This is the main expansion loop.  It reads in characters from the LZW file
**  until it sees the special code used to inidicate the end of the data.
*/
   while ((new_code = input_code(input)) != (MAX_VALUE))
  {
     if ( ++ counter == 1000 )    /* This section of code prints out      */
    {                       /* an asterisk every 1000 characters    */
      counter = 0 ;            /* It is just a pacifier.               */
      printf( " * " );
    }
/*
** This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING
** case which generates an undefined code.  It handles it by decoding
** the last code, and adding a single character to the end of the decode string.
*/
     if (new_code >= next_code)
    {
       * decode_stack = character;
       string = decode_string(decode_stack + 1 ,old_code);
    }
/*
** Otherwise we do a straight decode of the new code.
*/
     else
       string = decode_string(decode_stack,new_code);
/*
** Now we output the decoded string in reverse order.
*/
    character =* string ;
     while ( string >= decode_stack)
      putc( * string -- ,output);
/*
** Finally, if possible, add a new code to the string table.
*/
     if (next_code <= MAX_CODE)
    {
      prefix_code[next_code] = old_code;
      append_character[next_code] = character;
      next_code ++ ;
    }
    old_code = new_code;
  }
  printf( " " );
}

/*
** This routine simply decodes a string from the string table, storing
** it in a buffer.  The buffer can then be output in reverse order by
** the expansion program.
*/

unsigned char * decode_string(unsigned char * buffer,unsigned int code)
{
int i;

  i = 0 ;
   while (code > 255 )
  {
     * buffer ++ = append_character[code];
    code = prefix_code[code];
     if (i ++>= MAX_CODE)
    {
      printf( " Fatal error during code expansion. " );
      exit( - 3 );
    }
  }
   * buffer = code;
   return (buffer);
}

/*
** The following two routines are used to output variable length
** codes.  They are written strictly for clarity, and are not
** particularyl efficient.
*/

unsigned int input_code(FILE * input)
{
unsigned int return_value;
static int input_bit_count = 0 ;
static unsigned long input_bit_buffer = 0L ;

   while (input_bit_count <= 24 )
  {
    input_bit_buffer |=
        (unsigned long ) getc(input) << ( 24 - input_bit_count);
    input_bit_count += 8 ;
  }
  return_value = input_bit_buffer >> ( 32 - BITS);
  input_bit_buffer <<= BITS;
  input_bit_count -= BITS;
   return (return_value);
}

void output_code(FILE * output,unsigned int code)
{
static int output_bit_count = 0 ;
static unsigned long output_bit_buffer = 0L ;

  output_bit_buffer |= (unsigned long ) code << ( 32 - BITS - output_bit_count);
  output_bit_count += BITS;
   while (output_bit_count >= 8 )
  {
    putc(output_bit_buffer >> 24 ,output);
    output_bit_buffer <<= 8 ;
    output_bit_count -= 8 ;
  }
}