Huffman

一、实验原理 
1、本实验中Huffman编码算法 
(1)将文件以ASCII字符流的形式读入,统计每个符号的发生频率; 
(2)将所有文件中出现过的字符按照频率从小到大的顺序排列; 
(3)每一次选出最小的两个值,作为二叉树的两个叶子节点,将和作为它们的根节点, 这两个叶子节点不再参与比较,新的根节点参与比较; 
(4)重复3,直到最后得到和为1的根节点; 
(5)将形成的二叉树的左节点标0,右节点标1,把从最上面的根节点到最下面的叶子节 点途中遇到的0、1序列串起来,得到了各个字符的编码表示。 
2、Huffman编码的数据结构设计,在程序实现中使用一种叫做二叉树的数据结构实现Huffman编码。 
(1)哈夫曼节点结构
typedef struct huffman_node_tag
{
   unsigned char isLeaf;//是否为树叶 
   unsigned long count;//节点代表的符号加权和
   struct huffman_node_tag *parent;//父节点指针
   union 
   {
       struct 
       {
        struct huffman_node_tag *zero, *one; //子节点指针,分别代表0,1子节点指针 
        };
       unsigned char symbol;//节点代表的符号
   };
} huffman_node;

(2)哈夫曼码结构

typedef struct huffman_code_tag 
{
unsigned long numbits;//该码所用的比特数 
unsigned char *bits; //指向该码比特串的指针
} huffman_code;


具体代码:

Huffman.c

/*
 *  huffman - Encode/Decode files using Huffman encoding.
 *  http://huffman.sourceforge.net
 *  Copyright (C) 2003  Douglas Ryan Richardson; Gauss Interprise, Inc
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "huffman.h"




#ifdef WIN32
#include <winsock2.h>
#include <malloc.h>
#define alloca _alloca
#else
#include <netinet/in.h>
#endif






typedef struct huffman_node_tag
{
unsigned char isLeaf;
unsigned long count;
struct huffman_node_tag *parent;


union
{
struct
{
struct huffman_node_tag *zero, *one;
};
unsigned char symbol;
};
} huffman_node;


typedef struct huffman_code_tag
{
/* The length of this code in bits. */
unsigned long numbits;


/* The bits that make up this code. The first
  bit is at position 0 in bits[0]. The second
  bit is at position 1 in bits[0]. The eighth
  bit is at position 7 in bits[0]. The ninth
  bit is at position 0 in bits[1]. */
unsigned char *bits;
} huffman_code;


typedef struct huffman_info_tag
{
double freq;
unsigned long len_code;
unsigned char *code;
}huffman_info;






static unsigned long
numbytes_from_numbits(unsigned long numbits)
{
return numbits / 8 + (numbits % 8 ? 1 : 0);
}


/*
 * get_bit returns the ith bit in the bits array
 * in the 0th position of the return value.
 */
static unsigned char
get_bit(unsigned char* bits, unsigned long i)
{
return (bits[i / 8] >> i % 8) & 1;
}


static void
reverse_bits(unsigned char* bits, unsigned long numbits)
{
unsigned long numbytes = numbytes_from_numbits(numbits);
unsigned char *tmp =
   (unsigned char*)alloca(numbytes);
unsigned long curbit;
long curbyte = 0;

memset(tmp, 0, numbytes);


for(curbit = 0; curbit < numbits; ++curbit)
{
unsigned int bitpos = curbit % 8;


if(curbit > 0 && curbit % 8 == 0)
++curbyte;

tmp[curbyte] |= (get_bit(bits, numbits - curbit - 1) << bitpos);
}


memcpy(bits, tmp, numbytes);
}


/*
 * new_code builds a huffman_code from a leaf in
 * a Huffman tree.
 */
static huffman_code*
new_code(const huffman_node* leaf)
{
/* Build the huffman code by walking up to
* the root node and then reversing the bits,
* since the Huffman code is calculated by
* walking down the tree. */
unsigned long numbits = 0;
unsigned char* bits = NULL;
huffman_code *p;


while(leaf && leaf->parent)
{
huffman_node *parent = leaf->parent;
unsigned char cur_bit = (unsigned char)(numbits % 8);
unsigned long cur_byte = numbits / 8;


/* If we need another byte to hold the code,
  then allocate it. */
if(cur_bit == 0)
{
size_t newSize = cur_byte + 1;
bits = (char*)realloc(bits, newSize);
bits[newSize - 1] = 0; /* Initialize the new byte. */
}


/* If a one must be added then or it in. If a zero
* must be added then do nothing, since the byte
* was initialized to zero. */
if(leaf == parent->one)
bits[cur_byte] |= 1 << cur_bit;


++numbits;
leaf = parent;
}


if(bits)
reverse_bits(bits, numbits);


p = (huffman_code*)malloc(sizeof(huffman_code));
p->numbits = numbits;
p->bits = bits;
return p;
}


#define MAX_SYMBOLS 256
typedef huffman_node* SymbolFrequencies[MAX_SYMBOLS];
typedef huffman_code* SymbolEncoder[MAX_SYMBOLS];
//cai add 
typedef huffman_info* SymbolInfo[MAX_SYMBOLS];


static huffman_node*
new_leaf_node(unsigned char symbol)
{
huffman_node *p = (huffman_node*)malloc(sizeof(huffman_node));
p->isLeaf = 1;
p->symbol = symbol;
p->count = 0;
p->parent = 0;
return p;
}


static huffman_node*
new_nonleaf_node(unsigned long count, huffman_node *zero, huffman_node *one)
{
huffman_node *p = (huffman_node*)malloc(sizeof(huffman_node));
p->isLeaf = 0;
p->count = count;
p->zero = zero;
p->one = one;
p->parent = 0;

return p;
}


static void
free_huffman_tree(huffman_node *subtree)
{
if(subtree == NULL)
return;


if(!subtree->isLeaf)
{
free_huffman_tree(subtree->zero);
free_huffman_tree(subtree->one);
}

free(subtree);
}


static void
free_code(huffman_code* p)
{
free(p->bits);
free(p);
}


static void
free_encoder(SymbolEncoder *pSE)
{
unsigned long i;
for(i = 0; i < MAX_SYMBOLS; ++i)
{
huffman_code *p = (*pSE)[i];
if(p)
free_code(p);
}


free(pSE);
}


static void
init_frequencies(SymbolFrequencies *pSF)
{
memset(*pSF, 0, sizeof(SymbolFrequencies));
#if 0
unsigned int i;
for(i = 0; i < MAX_SYMBOLS; ++i)
{
unsigned char uc = (unsigned char)i;
(*pSF)[i] = new_leaf_node(uc);
}
#endif
}


typedef struct buf_cache_tag
{
unsigned char *cache;
unsigned int cache_len;
unsigned int cache_cur;
unsigned char **pbufout;
unsigned int *pbufoutlen;
} buf_cache;


static int init_cache(buf_cache* pc,
 unsigned int cache_size,
 unsigned char **pbufout,
 unsigned int *pbufoutlen)
{
assert(pc && pbufout && pbufoutlen);
if(!pbufout || !pbufoutlen)
return 1;

pc->cache = (unsigned char*)malloc(cache_size);
pc->cache_len = cache_size;
pc->cache_cur = 0;
pc->pbufout = pbufout;
*pbufout = NULL;
pc->pbufoutlen = pbufoutlen;
*pbufoutlen = 0;


return pc->cache ? 0 : 1;
}


static void free_cache(buf_cache* pc)
{
assert(pc);
if(pc->cache)
{
free(pc->cache);
pc->cache = NULL;
}
}


static int flush_cache(buf_cache* pc)
{
assert(pc);

if(pc->cache_cur > 0)
{
unsigned int newlen = pc->cache_cur + *pc->pbufoutlen;
unsigned char* tmp = realloc(*pc->pbufout, newlen);
if(!tmp)
return 1;


memcpy(tmp + *pc->pbufoutlen, pc->cache, pc->cache_cur);


*pc->pbufout = tmp;
*pc->pbufoutlen = newlen;
pc->cache_cur = 0;
}


return 0;
}


static int write_cache(buf_cache* pc,
  const void *to_write,
  unsigned int to_write_len)
{
unsigned char* tmp;


assert(pc && to_write);
assert(pc->cache_len >= pc->cache_cur);

/* If trying to write more than the cache will hold
* flush the cache and allocate enough space immediately,
* that is, don't use the cache. */
if(to_write_len > pc->cache_len - pc->cache_cur)
{
unsigned int newlen;
flush_cache(pc);
newlen = *pc->pbufoutlen + to_write_len;
tmp = realloc(*pc->pbufout, newlen);
if(!tmp)
return 1;
memcpy(tmp + *pc->pbufoutlen, to_write, to_write_len);
*pc->pbufout = tmp;
*pc->pbufoutlen = newlen;
}
else
{
/* Write the data to the cache. */
memcpy(pc->cache + pc->cache_cur, to_write, to_write_len);
pc->cache_cur += to_write_len;
}


return 0;
}


static unsigned int
get_symbol_frequencies(SymbolFrequencies *pSF, FILE *in)
{
int c;
unsigned int total_count = 0;

/* Set all frequencies to 0. */
init_frequencies(pSF);

/* Count the frequency of each symbol in the input file. */
while((c = fgetc(in)) != EOF)
{
unsigned char uc = c;
if(!(*pSF)[uc])
(*pSF)[uc] = new_leaf_node(uc);
++(*pSF)[uc]->count;
++total_count;
}


return total_count;
}


static unsigned int
get_symbol_frequencies_from_memory(SymbolFrequencies *pSF,
  const unsigned char *bufin,
  unsigned int bufinlen)
{
unsigned int i;
unsigned int total_count = 0;

/* Set all frequencies to 0. */
init_frequencies(pSF);

/* Count the frequency of each symbol in the input file. */
for(i = 0; i < bufinlen; ++i)
{
unsigned char uc = bufin[i];
if(!(*pSF)[uc])
(*pSF)[uc] = new_leaf_node(uc);
++(*pSF)[uc]->count;
++total_count;
}


return total_count;
}


/*
 * When used by qsort, SFComp sorts the array so that
 * the symbol with the lowest frequency is first. Any
 * NULL entries will be sorted to the end of the list.
 */
static int
SFComp(const void *p1, const void *p2)
{
const huffman_node *hn1 = *(const huffman_node**)p1;
const huffman_node *hn2 = *(const huffman_node**)p2;


/* Sort all NULLs to the end. */
if(hn1 == NULL && hn2 == NULL)
return 0;
if(hn1 == NULL)
return 1;
if(hn2 == NULL)
return -1;

if(hn1->count > hn2->count)
return 1;
else if(hn1->count < hn2->count)
return -1;


return 0;
}


#if 0
static void
print_freqs(SymbolFrequencies * pSF)
{
size_t i;
for(i = 0; i < MAX_SYMBOLS; ++i)
{
if((*pSF)[i])
printf("%d, %ld\n", (*pSF)[i]->symbol, (*pSF)[i]->count);
else
printf("NULL\n");
}
}
#endif


/*
 * build_symbol_encoder builds a SymbolEncoder by walking
 * down to the leaves of the Huffman tree and then,
 * for each leaf, determines its code.
 */
static void
build_symbol_encoder(huffman_node *subtree, SymbolEncoder *pSF)
{
if(subtree == NULL)
return;


if(subtree->isLeaf)
(*pSF)[subtree->symbol] = new_code(subtree);
else
{
build_symbol_encoder(subtree->zero, pSF);
build_symbol_encoder(subtree->one, pSF);
}
}


/*
 * calculate_huffman_codes turns pSF into an array
 * with a single entry that is the root of the
 * huffman tree. The return value is a SymbolEncoder,
 * which is an array of huffman codes index by symbol value.
 */
static SymbolEncoder*
calculate_huffman_codes(SymbolFrequencies * pSF)
{
unsigned int i = 0;
unsigned int n = 0;
huffman_node *m1 = NULL, *m2 = NULL;
SymbolEncoder *pSE = NULL;

#if 0
printf("BEFORE SORT\n");
print_freqs(pSF);
#endif


/* Sort the symbol frequency array by ascending frequency. */
qsort((*pSF), MAX_SYMBOLS, sizeof((*pSF)[0]), SFComp);


#if 0
printf("AFTER SORT\n");
print_freqs(pSF);
#endif


/* Get the number of symbols. */
for(n = 0; n < MAX_SYMBOLS && (*pSF)[n]; ++n)
;


/*
* Construct a Huffman tree. This code is based
* on the algorithm given in Managing Gigabytes
* by Ian Witten et al, 2nd edition, page 34.
* Note that this implementation uses a simple
* count instead of probability.
*/
for(i = 0; i < n - 1; ++i)
{
/* Set m1 and m2 to the two subsets of least probability. */
m1 = (*pSF)[0];
m2 = (*pSF)[1];


/* Replace m1 and m2 with a set {m1, m2} whose probability
* is the sum of that of m1 and m2. */
(*pSF)[0] = m1->parent = m2->parent =
new_nonleaf_node(m1->count + m2->count, m1, m2);
(*pSF)[1] = NULL;

/* Put newSet into the correct count position in pSF. */
qsort((*pSF), n, sizeof((*pSF)[0]), SFComp);
}


/* Build the SymbolEncoder array from the tree. */
pSE = (SymbolEncoder*)malloc(sizeof(SymbolEncoder));
memset(pSE, 0, sizeof(SymbolEncoder));
build_symbol_encoder((*pSF)[0], pSE);
return pSE;
}


/*
 * Write the huffman code table. The format is:
 * 4 byte code count in network byte order.
 * 4 byte number of bytes encoded
 *   (if you decode the data, you should get this number of bytes)
 * code1
 * ...
 * codeN, where N is the count read at the begginning of the file.
 * Each codeI has the following format:
 * 1 byte symbol, 1 byte code bit length, code bytes.
 * Each entry has numbytes_from_numbits code bytes.
 * The last byte of each code may have extra bits, if the number of
 * bits in the code is not a multiple of 8.
 */
static int
write_code_table(FILE* out, SymbolEncoder *se, unsigned int symbol_count)
{
unsigned long i, count = 0;

/* Determine the number of entries in se. */
for(i = 0; i < MAX_SYMBOLS; ++i)
{
if((*se)[i])
++count;
}


/* Write the number of entries in network byte order. */
i = htonl(count);
if(fwrite(&i, sizeof(i), 1, out) != 1)
return 1;


/* Write the number of bytes that will be encoded. */
symbol_count = htonl(symbol_count);
if(fwrite(&symbol_count, sizeof(symbol_count), 1, out) != 1)
return 1;


/* Write the entries. */
for(i = 0; i < MAX_SYMBOLS; ++i)
{
huffman_code *p = (*se)[i];
if(p)
{
unsigned int numbytes;
/* Write the 1 byte symbol. */
fputc((unsigned char)i, out);
/* Write the 1 byte code bit length. */
fputc(p->numbits, out);
/* Write the code bytes. */
numbytes = numbytes_from_numbits(p->numbits);
if(fwrite(p->bits, 1, numbytes, out) != numbytes)
return 1;
}
}


return 0;
}


/*
 * Allocates memory and sets *pbufout to point to it. The memory
 * contains the code table.
 */
static int
write_code_table_to_memory(buf_cache *pc,
  SymbolEncoder *se,
  unsigned int symbol_count)
{
unsigned long i, count = 0;


/* Determine the number of entries in se. */
for(i = 0; i < MAX_SYMBOLS; ++i)
{
if((*se)[i])
++count;
}


/* Write the number of entries in network byte order. */
i = htonl(count);

if(write_cache(pc, &i, sizeof(i)))
return 1;


/* Write the number of bytes that will be encoded. */
symbol_count = htonl(symbol_count);
if(write_cache(pc, &symbol_count, sizeof(symbol_count)))
return 1;


/* Write the entries. */
for(i = 0; i < MAX_SYMBOLS; ++i)
{
huffman_code *p = (*se)[i];
if(p)
{
unsigned int numbytes;
/* The value of i is < MAX_SYMBOLS (256), so it can
be stored in an unsigned char. */
unsigned char uc = (unsigned char)i;
/* Write the 1 byte symbol. */
if(write_cache(pc, &uc, sizeof(uc)))
return 1;
/* Write the 1 byte code bit length. */
uc = (unsigned char)p->numbits;
if(write_cache(pc, &uc, sizeof(uc)))
return 1;
/* Write the code bytes. */
numbytes = numbytes_from_numbits(p->numbits);
if(write_cache(pc, p->bits, numbytes))
return 1;
}
}


return 0;
}


/*
 * read_code_table builds a Huffman tree from the code
 * in the in file. This function returns NULL on error.
 * The returned value should be freed with free_huffman_tree.
 */
static huffman_node*
read_code_table(FILE* in, unsigned int *pDataBytes)
{
huffman_node *root = new_nonleaf_node(0, NULL, NULL);
unsigned int count;

/* Read the number of entries.
  (it is stored in network byte order). */
if(fread(&count, sizeof(count), 1, in) != 1)
{
free_huffman_tree(root);
return NULL;
}


count = ntohl(count);


/* Read the number of data bytes this encoding represents. */
if(fread(pDataBytes, sizeof(*pDataBytes), 1, in) != 1)
{
free_huffman_tree(root);
return NULL;
}


*pDataBytes = ntohl(*pDataBytes);




/* Read the entries. */
while(count-- > 0)
{
int c;
unsigned int curbit;
unsigned char symbol;
unsigned char numbits;
unsigned char numbytes;
unsigned char *bytes;
huffman_node *p = root;

if((c = fgetc(in)) == EOF)
{
free_huffman_tree(root);
return NULL;
}
symbol = (unsigned char)c;

if((c = fgetc(in)) == EOF)
{
free_huffman_tree(root);
return NULL;
}

numbits = (unsigned char)c;
numbytes = (unsigned char)numbytes_from_numbits(numbits);
bytes = (unsigned char*)malloc(numbytes);
if(fread(bytes, 1, numbytes, in) != numbytes)
{
free(bytes);
free_huffman_tree(root);
return NULL;
}


/*
* Add the entry to the Huffman tree. The value
* of the current bit is used switch between
* zero and one child nodes in the tree. New nodes
* are added as needed in the tree.
*/
for(curbit = 0; curbit < numbits; ++curbit)
{
if(get_bit(bytes, curbit))
{
if(p->one == NULL)
{
p->one = curbit == (unsigned char)(numbits - 1)
? new_leaf_node(symbol)
: new_nonleaf_node(0, NULL, NULL);
p->one->parent = p;
}
p = p->one;
}
else
{
if(p->zero == NULL)
{
p->zero = curbit == (unsigned char)(numbits - 1)
? new_leaf_node(symbol)
: new_nonleaf_node(0, NULL, NULL);
p->zero->parent = p;
}
p = p->zero;
}
}

free(bytes);
}


return root;
}


static int
memread(const unsigned char* buf,
unsigned int buflen,
unsigned int *pindex,
void* bufout,
unsigned int readlen)
{
assert(buf && pindex && bufout);
assert(buflen >= *pindex);
if(buflen < *pindex)
return 1;
if(readlen + *pindex >= buflen)
return 1;
memcpy(bufout, buf + *pindex, readlen);
*pindex += readlen;
return 0;
}


static huffman_node*
read_code_table_from_memory(const unsigned char* bufin,
unsigned int bufinlen,
unsigned int *pindex,
unsigned int *pDataBytes)
{
huffman_node *root = new_nonleaf_node(0, NULL, NULL);
unsigned int count;

/* Read the number of entries.
  (it is stored in network byte order). */
if(memread(bufin, bufinlen, pindex, &count, sizeof(count)))
{
free_huffman_tree(root);
return NULL;
}


count = ntohl(count);


/* Read the number of data bytes this encoding represents. */
if(memread(bufin, bufinlen, pindex, pDataBytes, sizeof(*pDataBytes)))
{
free_huffman_tree(root);
return NULL;
}


*pDataBytes = ntohl(*pDataBytes);


/* Read the entries. */
while(count-- > 0)
{
unsigned int curbit;
unsigned char symbol;
unsigned char numbits;
unsigned char numbytes;
unsigned char *bytes;
huffman_node *p = root;


if(memread(bufin, bufinlen, pindex, &symbol, sizeof(symbol)))
{
free_huffman_tree(root);
return NULL;
}


if(memread(bufin, bufinlen, pindex, &numbits, sizeof(numbits)))
{
free_huffman_tree(root);
return NULL;
}

numbytes = (unsigned char)numbytes_from_numbits(numbits);
bytes = (unsigned char*)malloc(numbytes);
if(memread(bufin, bufinlen, pindex, bytes, numbytes))
{
free(bytes);
free_huffman_tree(root);
return NULL;
}


/*
* Add the entry to the Huffman tree. The value
* of the current bit is used switch between
* zero and one child nodes in the tree. New nodes
* are added as needed in the tree.
*/
for(curbit = 0; curbit < numbits; ++curbit)
{
if(get_bit(bytes, curbit))
{
if(p->one == NULL)
{
p->one = curbit == (unsigned char)(numbits - 1)
? new_leaf_node(symbol)
: new_nonleaf_node(0, NULL, NULL);
p->one->parent = p;
}
p = p->one;
}
else
{
if(p->zero == NULL)
{
p->zero = curbit == (unsigned char)(numbits - 1)
? new_leaf_node(symbol)
: new_nonleaf_node(0, NULL, NULL);
p->zero->parent = p;
}
p = p->zero;
}
}

free(bytes);
}


return root;
}


static int
do_file_encode(FILE* in, FILE* out, SymbolEncoder *se)
{
unsigned char curbyte = 0;
unsigned char curbit = 0;
int c;

while((c = fgetc(in)) != EOF)
{
unsigned char uc = (unsigned char)c;
huffman_code *code = (*se)[uc];
unsigned long i;

for(i = 0; i < code->numbits; ++i)
{
/* Add the current bit to curbyte. */
curbyte |= get_bit(code->bits, i) << curbit;


/* If this byte is filled up then write it
* out and reset the curbit and curbyte. */
if(++curbit == 8)
{
fputc(curbyte, out);
curbyte = 0;
curbit = 0;
}
}
}


/*
* If there is data in curbyte that has not been
* output yet, which means that the last encoded
* character did not fall on a byte boundary,
* then output it.
*/
if(curbit > 0)
fputc(curbyte, out);


return 0;
}


static int
do_memory_encode(buf_cache *pc,
const unsigned char* bufin,
unsigned int bufinlen,
SymbolEncoder *se)
{
unsigned char curbyte = 0;
unsigned char curbit = 0;
unsigned int i;

for(i = 0; i < bufinlen; ++i)
{
unsigned char uc = bufin[i];
huffman_code *code = (*se)[uc];
unsigned long i;

for(i = 0; i < code->numbits; ++i)
{
/* Add the current bit to curbyte. */
curbyte |= get_bit(code->bits, i) << curbit;


/* If this byte is filled up then write it
* out and reset the curbit and curbyte. */
if(++curbit == 8)
{
if(write_cache(pc, &curbyte, sizeof(curbyte)))
return 1;
curbyte = 0;
curbit = 0;
}
}
}


/*
* If there is data in curbyte that has not been
* output yet, which means that the last encoded
* character did not fall on a byte boundary,
* then output it.
*/
return curbit > 0 ? write_cache(pc, &curbyte, sizeof(curbyte)) : 0;
}






/*
 * huffman_encode_file huffman encodes in to out.
 */
void cal_freq(SymbolInfo *si, SymbolFrequencies *sf, unsigned int total_count)
{
for (int i = 0; i < MAX_SYMBOLS; i++)
{
(*si)[i] = (huffman_info*)malloc(sizeof(huffman_info));
}


for (int i = 0; i < MAX_SYMBOLS; i++)
{
if (!(*sf)[i])
{
(*si)[i]->freq = 0;
(*si)[i]->len_code = 0;
(*si)[i]->code = NULL;


}
else
(*si)[i]->freq =(double) (*sf)[i]->count / total_count;
}


}
void draw_huffmaninfo(FILE *file_info, SymbolInfo *si, SymbolEncoder *se)
{
for (int i = 0; i < MAX_SYMBOLS; i++)
{
if ((*se)[i])
{
(*si)[i]->len_code = (*se)[i]->numbits;
(*si)[i]->code = (*se)[i]->bits;
}


}


fprintf(file_info, "Symbol\t Freq\t Length\t Code\n");
for (int i = 0; i < MAX_SYMBOLS; i++)
{
fprintf(file_info, "%d\t", i);
fprintf(file_info, "%f\t", (*si)[i]->freq);
fprintf(file_info, "%d\t", (*si)[i]->len_code);


if ((*si)[i]->freq != 0)
{
for (unsigned int j = 0; j < (*si)[i]->len_code; j++)
{
unsigned char c = get_bit((*si)[i]->code, j);
fprintf(file_info, "%d", c);
}
}
else{
fprintf(file_info, "0");
}


fprintf(file_info, "\n");//换行符
}


fclose(file_info);


}


int
huffman_encode_file(FILE *in, FILE *out,FILE *file)
{


SymbolFrequencies sf;
SymbolEncoder *se;
SymbolInfo si;
huffman_node *root = NULL;
int rc;
unsigned int symbol_count;


/* Get the frequency of each symbol in the input file. */
symbol_count = get_symbol_frequencies(&sf, in);


cal_freq(&si, &sf, symbol_count);


/* Build an optimal table from the symbolCount. */
se = calculate_huffman_codes(&sf);
draw_huffmaninfo(file, &si, se);


root = sf[0];


// draw_huffmaninfo(file, &sf, &se, symbol_count);/*输出表格数据*/
printf("draw excel ok\n");








/* Scan the file again and, using the table
  previously built, encode it into the output file. */
rewind(in);
rc = write_code_table(out, se, symbol_count);
if(rc == 0)
rc = do_file_encode(in, out, se);




/* Free the Huffman tree. */
free_huffman_tree(root);
free_encoder(se);
return rc;
}


int
huffman_decode_file(FILE *in, FILE *out)
{
huffman_node *root, *p;
int c;
unsigned int data_count;

/* Read the Huffman code table. */
root = read_code_table(in, &data_count);
if(!root)
return 1;


/* Decode the file. */
p = root;
while(data_count > 0 && (c = fgetc(in)) != EOF)
{
unsigned char byte = (unsigned char)c;
unsigned char mask = 1;
while(data_count > 0 && mask)
{
p = byte & mask ? p->one : p->zero;
mask <<= 1;


if(p->isLeaf)
{
fputc(p->symbol, out);
p = root;
--data_count;
}
}
}


free_huffman_tree(root);
return 0;
}


#define CACHE_SIZE 1024


int huffman_encode_memory(const unsigned char *bufin,
 unsigned int bufinlen,
 unsigned char **pbufout,
 unsigned int *pbufoutlen)
{
SymbolFrequencies sf;
SymbolEncoder *se;
huffman_node *root = NULL;
int rc;
unsigned int symbol_count;
buf_cache cache;


/* Ensure the arguments are valid. */
if(!pbufout || !pbufoutlen)
return 1;


if(init_cache(&cache, CACHE_SIZE, pbufout, pbufoutlen))
return 1;


/* Get the frequency of each symbol in the input memory. */
symbol_count = get_symbol_frequencies_from_memory(&sf, bufin, bufinlen);


/* Build an optimal table from the symbolCount. */
se = calculate_huffman_codes(&sf);
root = sf[0];


/* Scan the memory again and, using the table
  previously built, encode it into the output memory. */
rc = write_code_table_to_memory(&cache, se, symbol_count);
if(rc == 0)
rc = do_memory_encode(&cache, bufin, bufinlen, se);






/* Flush the cache. */
flush_cache(&cache);

/* Free the Huffman tree. */
free_huffman_tree(root);
free_encoder(se);
free_cache(&cache);
return rc;
}


int huffman_decode_memory(const unsigned char *bufin,
 unsigned int bufinlen,
 unsigned char **pbufout,
 unsigned int *pbufoutlen)
{
huffman_node *root, *p;
unsigned int data_count;
unsigned int i = 0;
unsigned char *buf;
unsigned int bufcur = 0;


/* Ensure the arguments are valid. */
if(!pbufout || !pbufoutlen)
return 1;


/* Read the Huffman code table. */
root = read_code_table_from_memory(bufin, bufinlen, &i, &data_count);
if(!root)
return 1;


buf = (unsigned char*)malloc(data_count);


/* Decode the memory. */
p = root;
for(; i < bufinlen && data_count > 0; ++i) 
{
unsigned char byte = bufin[i];
unsigned char mask = 1;
while(data_count > 0 && mask)
{
p = byte & mask ? p->one : p->zero;
mask <<= 1;


if(p->isLeaf)
{
buf[bufcur++] = p->symbol;
p = root;
--data_count;
}
}
}


free_huffman_tree(root);
*pbufout = buf;
*pbufoutlen = bufcur;
return 0;
}

实验结果:

以表格形式统计实验数据:

文件类型 平均码长 信源熵 源文件大小(kB) 压缩后文件大小(kB) 压缩比
ppt 8.617 6.681 44 38 1.158
avi 8.016 7.989 718 718 1
ico 12.914 2.978 426 160 2.663
jpg 8.055 7.983 5366 5355 1.002
yuv 10.480 2.379 732 219 3.342
dll 8.883 6.948 440 383 1.149
docx 8.818 7.709 11 12 0.917
rar 7.738 7.349 1.03 1.75 0.589
gif 8.012 7.997 15 16 0.938
bmp 4.238 2.547 451 144 3.132
实验结果与预期结果一样,对于分布越是不均匀的样本,Huffman编码的压缩率越高。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值