The compression book 一书"0阶算术编码"源代码,经测试

/************************** Start of ARITH.C **************************/
#include <stdio.h>
#include <stdlib.h>
#include "errhand.h"
#include "bitio.h"

/*
 * The SYMBOL structure is what is used to define a symbol in
 * arithmetic coding terms. A symbol is defined as a range between
 * 0 and 1.  Since we are using integer math, instead of using 0 and 1
 * as our end points, we have an integer scale.  The low_count and
 * high_count define where the symbol falls in the range.
 */
typedef struct{
 unsigned short int low_count;
 unsigned short int high_count;
 unsigned short int scale;
}SYMBOL;

/*
 * Internal function prototypes, with or without ANSI prototypes.
 */
#ifdef __STDC__

void build_model( FILE *input, FILE *output );
void scale_counts( unsigned long counts[],
       unsigned char scaled_counts[] );
void build_totals( unsigned char scaled_counts[] );
void count_bytes( FILE *input, unsigned long counts[] );
void output_counts( FILE *output, unsigned char scaled_counts[] );
void input_counts( FILE *stream );
void convert_int_to_symbol( int symbol, SYMBOL *s );
void get_symbol_scale( SYMBOL *s );
int convert_symbol_to_int( int count, SYMBOL *s );
void initialize_arithmetic_encoder( void );
void encode_symbol( BIT_FILE *stream, SYMBOL *s );
void flush_arithmetic_encoder( BIT_FILE *stream );
short int get_current_count( SYMBOL *s );
void initialize_arithmetic_decoder( BIT_FILE *stream );
void remove_symbol_from_stream( BIT_FILE *stream, SYMBOL *s);

#else

void build_model();
void scale_counts();
void build_totals();
void count_bytes();
void output_counts();
void input_counts();
void convert_int_to_symbol();
void get_symbol_scale();
int convert_symbol_to_int();
void initialize_arithmetic_encoder();
void encode_symbol();
void flush_arithmetic_encoder();
short int get_current_count();
void initialize_arithmetic_decoder();
void remove_symbol_from_stream();

#endif

#define END_OF_STREAM 256
short int totals[ 256 ]; /* The cumulative totals */

char *CompressionName = "Adaptive order 0 model with arithmetic coding";
char *Usage     = "in-file out-file/n/n";

/*
 * This compress file routine is a fairly orthodox compress routine.
 * It first gathers statistics, and initializes the arithmetic
 * encoder.  It then encodes all the characters in the file, followed
 * by the EOF character.  The output stream is then flushed, and we
 * exit.  Note that an extra two bytes are output.  When decoding an
 * arithmetic stream, we have to read in extra bits.  The decoding process
 * takes place in the msb of the low and high range ints, so when we are
 * decoding our last bit we will still have to have at least 15 junk
 * bits loaded into the registers.  The extra two bytes account for
 * that.
 */

void CompressFile( input, output, argc, argv )
FILE *input;
BIT_FILE *output;
int argc;
char *argv[];
{
 int c;
 SYMBOL s;

 build_model( input, output->file );
 initialize_arithmetic_encoder();

 while( ( c = getc( input ) ) != EOF )
 {
  convert_int_to_symbol( c, &s );
  encode_symbol( output, &s );
 }
 convert_int_to_symbol( END_OF_STREAM, &s );
 encode_symbol( output, &s );
 flush_arithmetic_encoder( output );
 OutputBits( output, 0L, 16 ); /*  junk bits */
 while ( argc-- > 0 )
 {
  printf( "Unused argument: %s/n", *argv );
  argv++;
 }
}

/*
 * This expand routine is also very conventional.  It reads in the
 * model, initializes the decoder, then sits in a loop reading in
 * characters.  When we decode an END_OF_STREAM, it means we can close
 * up the files and exit.  Note decoding a single character is a three
 * step process: first we determine what the scale is for the current
 * symbol by looking at the difference between the high and low values.
 * We then see where the current input values fall in that range.
 * Finally, we look in our totals array to find out what symbol is
 * a match. After that is done, we still have to remove that symbol
 * from the decoder.  Lots of work.
 */

void ExpandFile( input, output, argc, argv )
BIT_FILE *input;
FILE *output;
int argc;
char *argv[];
{
 SYMBOL s;
 int c;
 int count;

 input_counts( input->file );
 initialize_arithmetic_decoder( input );
 for (;;)
 {
  get_symbol_scale( &s );
  count = get_current_count( &s );
  c = convert_symbol_to_int( count, &s );
  if ( c == END_OF_STREAM )
   break;
  remove_symbol_from_stream( input, &s );
  putc( (char)c, output );
 }
 while( argc--> 0 )
 {
  printf( "Unused argument: %s/n", *argv );
  argv++;
 }
}


/*
 * This is the routine that is called to scan the input file, scale
 * the counts, build the totals array, the output the scaled counts
 * to the output file.
 */

void build_model( input, output )
FILE *input;
FILE *output;
{
 unsigned long counts[ 256 ];
 unsigned char scaled_counts[ 256 ];
 count_bytes( input, counts );
 scale_counts( counts, scaled_counts );
 output_counts( output, scaled_counts );
 build_totals( scaled_counts );
}

/*
 * This routine runs through the file and counts the appearances of
 * each character.
 */

#ifndef SEEK_SET
#define SEEK_SET 0
#endif

void count_bytes( input, counts )
FILE *input;
unsigned long counts[];
{
 long input_marker;
 int i;
 int c;

 for ( i=0; i<256; i++ )
  counts[i] = 0;
 input_marker =ftell( input );
 while ( ( c = getc( input ) ) != EOF )
  counts[ c ]++;
 fseek( input, input_marker, SEEK_SET );
}

/*
 * This routine is called to scale the counts down. There are two
 * types of scaling that must be done. First, the counts need to be
 * scaled down so that the individual counts fit into a single unsigned
 * char.  Then, the counts need to be rescaled so that the total of all
 * counts is less than 16384.
 */

void scale_counts( counts, scaled_counts )
unsigned long counts[];
unsigned char scaled_counts[];
{
 int i;
 unsigned long max_count;
 unsigned int total;
 unsigned long scale;
/*
 * The first section of code makes sure each count fits into a single
 * byte.
 */
 max_count = 0;
 for ( i=0; i<256; i++ )
  if ( counts[i] > max_count )
   max_count = counts[ i ];
 scale = max_count/256;
 scale = scale + 1;
 for ( i=0; i<256; i++ )
 {
  scaled_counts[i] = (unsigned char)( counts[i]/scale );
  if ( scaled_counts[i] == 0 && counts[i] !=0 )
   scaled_counts[i] = 1;
 }
/*
 * This next section makes sure the total is less than 16384.
 * I initialize the total to 1 instead of 0 because there will be an
 * additional 1 added in for the END_OF_STREAM symbol;
 */
 total = 1;
 for ( i=0; i<256; i++ )
   total += scaled_counts[i];
 if ( total> (32767-256) )
  scale = 4;
 else if ( total>16383 )
  scale = 2;
 else return ;
 for ( i=0 ; i<256; i++ )
  scaled_counts[i] /=scale;
}

/*
 * This routine is used by both the encoder and decoder to build the
 * table of cumulative totals.  The counts for the characters in the
 * file are in the counts array, and we know that there will be a
 * single instance of the EOF symbol.
 */
void build_totals( scaled_counts )
unsigned char scaled_counts[];
{
 int i;

 totals[0] = 0;
 for ( i=0; i<END_OF_STREAM; i++ )
  totals[i+1] = totals[i]+scaled_counts[i];
 totals[END_OF_STREAM+1] = totals[END_OF_STREAM]+1;
}

/*
 * In order for the compressor to build the same model, I have to
 * store the symbol counts in the compressed file so the expander can
 * read them in.  In order to save space, I don't save all 256 symbols
 * unconditionally.  The format used to store counts looks like this:
 *
 * start, stop, counts, start, stop, counts, … 0
 *
 * This means that I store runs of counts, until all the non-zero
 * counts have been stored.  At this time the list is terminated by
 * storing a start value of 0.  Note that at least 1 run of counts has
 * to be stored, so even if the first start value is 0, I read it in.
 * It also means that even in an empty file that has no counts, I have
 * to pass at least one count.
 *
 * In order to efficiently use this format, I have to identify runs of
 * non-zero counts.  Because of the format used, I don't want to stop a
 * run because of just one or two zeros in the count stream.  So I have
 * to sit in a loop looking for strings of three or more zero values
 * in a row.
 *
 * This is simple in concept, but it ends up being one of the most
 * complicated routines in the whole program.  A routine that just
 * writes out 256 values without attempting to optimize would be much
 * simpler, but would hurt compression quite a bit on small files.
 *
 */
void output_counts( output, scaled_counts )
FILE *output;
unsigned char scaled_counts[];
{
 int first;
 int last;
 int next;
 int i;

 first = 0;
 while( first<255 && scaled_counts[first] == 0 )
  first++;
/*
 * Each time I hit the start of the loop, I assume that first is the
 * number for a run of non-zero values.  The rest of the loop is
 * concerned with finding the value for last, which is the end of the
 * run, and the value of next, which is the start of the next run.
 * At the end of the loop, I assign next to first, so it starts in on
 * the next run.
 */
 for( ; first<256; first=next )
 {
  last = first+1;
  for (;;)
  {
   for (; last<256; last++)
    if( scaled_counts[last] == 0 )break;
   last--;
   for ( next=last+1; next<256; next++ )
    if ( scaled_counts[next]!=0 ) break;
   if ( next>255 )break;
   if ( (next-last)>3 )break;
   last = next;
  }
 /*
  * Here is where I output first, last, and all the counts in between.
  */
  if ( putc( first, output ) != first )
   fatal_error( "Error writing byte counts/n");
  if ( putc( last, output ) != last )
   fatal_error( "Error writing byte counts/n");
  for ( i=first; i<=last; i++ )
  {
   if ( putc(scaled_counts[i], output)!=(int)scaled_counts[i] )
    fatal_error( "Error writing byte counts/n" );
  }
 }
 if ( putc( 0, output )!=0 )
  fatal_error( "Error writing byte counts/n");
}

/*
 * When expanding, I have to read in the same set of counts.  This is
 * quite a bit easier that the process of writing them out, since no
 * decision making needs to be done.  All I do is read in first, check
 * to see if I am all done, and if not, read in last and a string of
 * counts.
 */
void input_counts( input )
FILE *input;
{
 int first;
 int last;
 int i;
 int c;
 unsigned char scaled_counts[256];
 for ( i=0; i<256; i++ )
  scaled_counts[i] = 0;
 if( ( first = getc( input ) ) == EOF )
  fatal_error( "Error reading byte counts/n" );
 if ( ( last = getc( input ) ) == EOF )
  fatal_error( "Error reading byte counts/n" );
 for (;;)
 {
  for ( i=first; i<=last; i++ )
   if ( ( c=getc( input ) ) == EOF )
    fatal_error( "Error reading byte counts/n");
   else
    scaled_counts[i] = (unsigned int) c;
  if ( ( first = getc(input) ) == EOF )
   fatal_error( "Error reading byte counts/n" );
  if ( first == 0 )
   break;
  if ( ( last = getc( input ) ) == EOF )
   fatal_error( "Error reading byte counts/n" );
 }
 build_totals( scaled_counts );
}

/*
 * Everything from here down defines the arithmetic coder section
 * of the program.
 */

/*
 * These four variables define the current state of the arithmetic
 * coder/decoder.  They are assumed to be 16 bits long.  Note that
 * by declaring them as short ints, they will actually be 16 bits
 * on most 80X86 and 680X0 machines, as well as VAXen.
 */

static unsigned short int code;/* The present input code value */
static unsigned short int low; /* Start of the current code range */
static unsigned short int high; /* End of the current code range */
long underflow_bits;     /* Number of underflow bits pending */

/*
 * This routine must be called to initialize the encoding process.
 * The high register is initialized to all 1s, and it is assumed that
 * it has an infinite string of 1s to be shifted into the lower bit
 * positions when needed.
 */

void initialize_arithmetic_encoder()
{
 low = 0;
 high = 0xffff;
 underflow_bits = 0;
}

/*
 * At the end of the encoding process, there are still significant
 * bits left in the high and low registers.  We output two bits,
 * plus as many underflow bits as are necessary.
 */
void flush_arithmetic_encoder( stream )
BIT_FILE *stream;
{
 OutputBit( stream, low & 0x4000 );
 underflow_bits++;
 while( underflow_bits-- > 0 );
  OutputBit( stream, ~low & 0x4000 );
}

/*
 * Finding the low count, high count, and scale for a symbol
 * is really easy, because of the way the totals are stored.
 * This is the one redeeming feature of the data structure used
 * in this implementation.
 */
void convert_int_to_symbol( c, s)
int c;
SYMBOL *s;
{
 s->scale = totals[ END_OF_STREAM+1 ];
 s->low_count = totals[c];
 s->high_count = totals[c+1];
}

/*
 * Getting the scale for the current context is easy.
 */
void get_symbol_scale( s )
SYMBOL *s;
{
 s->scale = totals[ END_OF_STREAM+1 ];
}

/*
 * During decompression, we have to search through the table until
 * we find the symbol that straddles the "count" parameter.  When
 * it is found, it is returned.  The reason for also setting the
 * high count and low count is so that symbol can be properly removed
 * from the arithmetic coded input.
 */

int convert_symbol_to_int( count, s )
int count;
SYMBOL *s;
{
 int c;
 for ( c=END_OF_STREAM; count<totals[c]; c-- );
  ;
 s->high_count = totals[ c+1 ];
 s->low_count = totals[ c ];
 return c;
}

/*
 * This routine is called to encode a symbol.  The symbol is passed
 * in the SYMBOL structure as a low count, a high count, and a range,
 * instead of the more conventional probability ranges.  The encoding
 * process takes two steps.  First, the values of high and low are
 * updated to take into account the range restriction created by the
 * new symbol.  Then, as many bits as possible are shifted out to
 * the output stream.  Finally, high and low are stable again and
 * the routine returns.
 */

void encode_symbol( stream, s )
BIT_FILE *stream;
SYMBOL *s;
{
 long range;
/*
 * These three lines rescale high and low for the new symbol.
 */
 range = (long)( high - low ) + 1;
 high = low + (unsigned short int)
     ((range*s->high_count)/s->scale - 1 );
 low = low + (unsigned short int)
     ((range*s->low_count)/s->scale);

/*
 * This loop turns out new bits until high and low are far enough
 * apart to have stabilized.
 */
 for (;;)
 {
/*
 * If this test passes, it means that the MSDigits match, and can
 * be sent to the output stream.
 */
  if ( ( high&0x8000 ) == ( low&0x8000 ) )
  {
   OutputBit( stream, high&0x8000 );
   while( underflow_bits > 0 )
   {
    OutputBit( stream, ~high &0x8000 );
    underflow_bits--;
   }
  }
/*
 * If this test passes, the numbers are in danger of underflow, because
 * the MSDigits don't match, and the 2nd digits are just one apart.
 */
  else if ( (low&0x4000 ) && !(high&0x4000) )
  {
   underflow_bits += 1;
   low &= 0x3fff; 
   high |= 0x4000;
  }
  else return;
  low <<= 1;
  high <<= 1;
  high |= 1;
 }
}

/*
 * When decoding, this routine is called to figure out which symbol
 * is presently waiting to be decoded.  This routine expects to get
 * the current model scale in the s->scale parameter, and it returns
 * a count that corresponds to the present floating point code;
 *
 * code = count / s->scale
 */
short int get_current_count( s )
SYMBOL *s;
{
 long range;
 short int count;

 range = (long) ( high - low ) + 1;
 count = (short int)
   ((((long) ( code - low ) + 1 ) * s->scale-1) /range );
 return (count);
}

/*
 * This routine is called to initialize the state of the arithmetic
 * decoder.  This involves initializing the high and low registers
 * to their conventional starting values, plus reading the first
 * 16 bits from the input stream into the code value.
 */
void initialize_arithmetic_decoder( stream )
BIT_FILE *stream;
{
 int i;
 code = 0;
 for ( i=0; i<16; i++ )
 {
  code <<= 1;
  code += InputBit( stream );
 }
 low  = 0;
 high = 0xffff;
}

/*
 * Just figuring out what the present symbol is doesn't remove
 * it from the input bit stream.  After the character has been
 * decoded, this routine has to be called to remove it from the
 * input stream.
 */
void remove_symbol_from_stream( stream, s )
BIT_FILE *stream;
SYMBOL *s;
{
 long range;

/*
 * First, the range is expanded to account for the symbol removal.
 */
 range = (long)(high-low) + 1;
 high = low + (unsigned short int)
   (( range*s->high_count )/s->scale - 1);
 low  = low + (unsigned short int)
   (( range*s->low_count )/s->scale);
/*
 * Next, any possible bits are shipped out.
 */
 for(;;)
 {
/*
 * If the MSDigits match, the bits will be shifted out.
 */
  if( ( high&0x8000 ) == ( low&0x8000) ) ;
/*
 * Else, if underflow is threatening, shift out the 2nd MSDigit.
 */
  else if ( ( low&0x4000) == 0x4000 && (high&0x4000) == 0 )
  {
   code ^= 0x4000;
   low &= 0x3fff;
   high |= 0x4000;
  }
  else
/*
 * Otherwise, nothing can be shifted out, so I return.
 */
   return ;
  low <<= 1;
  high <<= 1;
  high |= 1;
  code <<= 1;
  code += InputBit( stream );

 }
}
/*************************** End of ARITH.C ****************************/
 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
《内存层次结构压缩入门》这本书通过解释内存层次结构中的压缩原理和技术,给读者提供了一个全面的指南。该书介绍了压缩在内存层次结构中的重要性,以及它对计算机系统性能和效率的影响。 第一章从介绍内存层次结构的基本概念开始,这是理解后续内容的基础。然后,书中详细讨论了压缩在内存层次结构中的应用和优化。这些应用包括对内存中的数据和指令进行压缩,以减少内存占用和传输成本,并提高数据的访问时间。 该书还涵盖了各种压缩算法和技术,包括无损压缩和有损压缩。无损压缩算法可以压缩数据而不丢失任何信息,这在一些需要精确数据的应用中非常重要。有损压缩算法可以在一定程度上丢失一些数据信息,但可以显著减少数据的存储空间和传输带宽。 除了介绍压缩算法和技术外,该书还讨论了压缩对计算机系统性能的影响。它涵盖了压缩对内存延迟、带宽、能耗和缓存性能的影响,并提供了一些优化技术和指导原则。此外,书中还提供了一些实际应用案例和实验结果,以帮助读者更好地理解和应用所学知识。 总之,这本书是一本全面介绍内存层次结构压缩的指南,适合那些对计算机系统设计和性能优化感兴趣的读者。无论是从理论还是实践的角度,该书都提供了丰富的内容和实用的指导。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值