Huffman编码（二叉树的应用）

最新推荐文章于 2021-11-14 22:03:44 发布

changbaolong

最新推荐文章于 2021-11-14 22:03:44 发布

阅读量3.5k

点赞数 1

分类专栏： C/C++ 文章标签： null file input 算法 delete microsoft

C/C++ 专栏收录该内容

67 篇文章

订阅专栏

本文介绍了一个基于Huffman编码实现文件压缩与解压的程序设计案例。通过构建Huffman树为文件中的字符分配编码，实现去除数据冗余，达到压缩文件的目的。文章详细描述了Huffman树的构建过程和编码解码的实现细节。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Huffman编码（二叉树的应用）

一、实验号题目：

Huffman编码(二叉树应用)

二、实验目的和要求：

1.要求对文件进行Huffman编码的算法,以及对乙编码文件进行解码的算法,为简单起见,可以假设文件是存放在一个字符向量;

2.熟练掌握二叉树的应用,具体要求如下:

.最小冗余码/哈夫曼码

● ASCII码/定长码

ab12: 01100001 01100010 00110001 00110010

97 98 49 50

● 哈夫曼码/不定长码

能按字符的使用频度,使文本代码的总长度具有最小值。

3.熟练掌握计算机系统的基本操作方法,了解如何编

辑、编译、链接和运行一个C++程序及二叉树上的

基本运算;

4.上机调试程序,掌握查错、排错使程序能正确运行。

三、实验的环境:

1.硬件环境:显示器、主机、鼠标、键盘等；

2.软件环境:

操作系统：Windows xp；

编译软件：Microsoft Visual C++ 6.0。

四、算法描述:

1.本实验要求用huffman编码对文件进行压缩，根据huffman树对文件中各字符分配代码，所以可以按照以下几步进行：

1）打开要压缩的文件，对文件中的字符进行统计，统计每个字符在文件中出现的次数；

2）在源文件中建立huffman树，给huffman树分配编码，从而获取每个字符的编码；

3）在用户指定的位置新建并打开一个用户输入文件名的目标文件，将建立的huffman树保存在这个目标文件中，并将源文件中的每个字符的huffman编码写入这个目标文件。

2.在解压阶段，先根据用户输入的文件名新建并打开一个目标文件，然后根据存储的huffman树和给各个字符分配的代码，将源文件文件中的每一个01串还原成对应的字符，并将还原的每一个字符写入该目标文件中，从而达到解压的目的。

3.建立huffman树的算法

1）根据给定的n个权值，构造由n棵二叉树构成的森林F=，其中每棵二叉树分别都是只含有一个带权值为的根结点，其左、右子树为空；

2）在森林F中选取根结点的权值最小的两棵二叉树（若这样的二叉树不止两棵，则任选其中两棵），分别作为左、右子树构造一棵新的二叉树，并置这棵新的二叉树根结点的权值为其左、右子树根结点的权值之和；

3）从森林F中删去这两棵二叉树，同时将刚生成的新二叉树加入到森林F中。

4）重复步骤2）和3），直至森林F中只含一棵二叉树为止，则得到的二叉树就是huffman树。

五、源程序清单：

//Utility.h

#include<string.h> //standard string operations

#include<iostream> //standard iostream operations

#include<limits.h> //numeric limits

#include<math.h> //mathematical functions

#include<fstream> //file input and output

#include<ctype.h> //character classification

#include<time.h> //date and time function

#include<conio.h> //con input and output

#include<stdlib.h> //standard libray

#include<stdio.h> //standard I/O libray

enum Error_code{success,fail,underflow,overflow,range_error};

//enum bool{false,true};

//Lk_stack.h

template<class Node_entry>

struct Node {

// data members

Node_entry entry;

Node<Node_entry> *next;

// constructors

Node();

Node(Node_entry item, Node<Node_entry> *add_on = NULL);

};

template<class Stack_entry>

class Stack {

public:

// Standard Stack methods

Stack();

bool empty() const;

Error_code push(const Stack_entry &item);

Error_code pop();

Error_code top(Stack_entry &item) const;

void clear();

// Safety features for linked structures

~Stack();

Stack(const Stack<Stack_entry> &original);

void operator =(const Stack<Stack_entry> &original);

protected:

Node<Stack_entry> *top_node;

};

template<class Node_entry>

Node<Node_entry>::Node()

{

next = NULL;

}

template<class Node_entry>

Node<Node_entry>::Node(Node_entry item, Node<Node_entry> *add_on)

{

entry = item;

next = add_on;

}

template<class Stack_entry>

Stack<Stack_entry>::Stack()

{

top_node=NULL;

}

template<class Stack_entry>

bool Stack<Stack_entry>::empty() const

{

if(top_node==NULL)

return true;

else

return false;

}

template<class Stack_entry>

Error_code Stack<Stack_entry>::push(const Stack_entry &item)

Post: Stack_entry item is added to the top of

the Stack; returns success or returns a code

of overflow if dynamic memory is exhausted.

{

Node<Stack_entry> *new_top = new Node<Stack_entry>(item, top_node);

if (new_top == NULL) return overflow;

top_node = new_top;

return success;

}

template<class Stack_entry>

Error_code Stack<Stack_entry>::pop()

Post: The top of the Stack is removed. If the Stack

is empty the method returns underflow; otherwise it returns success.

{

Node<Stack_entry> *old_top = top_node;

if (top_node == NULL) return underflow;

top_node = old_top->next;

delete old_top;

return success;

}

template<class Stack_entry>

Error_code Stack<Stack_entry>::top(Stack_entry &item) const

{

if(empty())

return underflow;

else{

item=top_node->entry;

return success;

}

template<class Stack_entry>

void Stack<Stack_entry>::clear() // clear elememt

Post: The Stack is cleared.

{

while (!empty())

pop();

}

template<class Stack_entry>

Stack<Stack_entry>::~Stack() // Destructor

Post: The Stack is cleared.

{

clear();

}

template<class Stack_entry>

Stack<Stack_entry>::Stack(const Stack<Stack_entry> &original) // copy constructor

Post: The Stack is initialized as a copy of Stack original.

{

Node<Stack_entry> *new_copy, *original_node = original.top_node;

if (original_node == NULL) top_node = NULL;

else

{ // Duplicate the linked nodes.

top_node = new_copy = new Node<Stack_entry>(original_node->entry);

while (original_node->next != NULL)

{

original_node = original_node->next;

new_copy->next = new Node<Stack_entry>(original_node->entry);

new_copy = new_copy->next;

}

template<class Stack_entry>

void Stack<Stack_entry>::operator = (const Stack<Stack_entry> &original) // Overload assignment

Post: The Stack is reset as a copy of Stack original.

{

Node<Stack_entry> *new_top, *new_copy, *original_node = original.top_node;

if (original_node == NULL) new_top = NULL;

else

{ // Duplicate the linked nodes

new_copy = new_top = new Node<Stack_entry>(original_node->entry);

while (original_node->next != NULL)

{

original_node = original_node->next;

new_copy->next = new Node<Stack_entry>(original_node->entry);

new_copy = new_copy->next;

}

while (!empty()) // Clean out old Stack entries

pop();

top_node = new_top; // and replace them with new entries.

}

//Huffman.h

const unsigned int n=256; //字符数

const unsigned int m=256*2-1; //结点总数

struct HTNode{ //压缩用Huffman树结点

unsigned long weight; //字符频度（权值）

unsigned int parent,lchild,rchild;

};

struct Buffer{ //字节缓冲压缩用Huffman树

char ch; //字节

unsigned int bits; //实际比特数

};

class HuffmanTree{ //Huffman树

public:

void Code(); //编码

void UnCode(); //译码

private:

HTNode HT[m+1]; //树结点表(HT[1]到HT[m])

char Leaf[n+1]; //叶结点对应字符(leaf[1]到leaf[n])

char *HuffmanCode[n+1]; //叶结点对应编码(*HuffmanCode[1]到*HuffmanCode[n])

unsigned int count; //频度大于零的字符数

unsigned int char_index[n]; //字符对应在树结点表的下标(char_index[0]到char_index[n-1])

unsigned long size; //被压缩文件长度

FILE *infp,*outfp; //输入/出文件

Buffer buf; //字符缓冲

void Stat(); //统计字符出现频度并过滤掉频度为零的字符

//在HT[0]~HT[k]中选择parent为-1，树值最小的两个结点s1,s2

void Select(unsigned int k, unsigned int &s1, unsigned int &s2);

void Write(unsigned int bit); //向outfp中写入一个比特

void Write(unsigned int num,unsigned int k);//向outfp中写入k个比特

void WriteToOutfp(); //强行写入outfp

void Read(unsigned int &bit); //从infp中读出一个比特

void Read(unsigned int &num,unsigned int k);//从infp中读出k个比特

int NToBits(unsigned int num); //0~num之间的整数用二进位表示所需的最少位数

void CreateFromCodeFile(); //由编码文件中存储的树结构建立Huffman树

//由被压缩文件建立Huffman树,将树结构存入编码文件的文件头部中,并求每个字符的Huffman编码

void CreateFromSourceFile();

};

void HuffmanTree::Code() //编码

{

char infName[256],outfName[256];

cout<<"Please input source file name(size less than 4GB):"; //被压缩文件最多GB

cin>>infName;

if((infp=fopen(infName,"rb"))==NULL){

cout<<"Can not open file:"<<infName<<endl;

exit(1);

}

if(feof(infp)!=0){

cout<<"Empty source file:"<<infName<<endl;

exit(1);

}

cout<<"Please input code file name:";

cin>>outfName;

if((outfp=fopen(outfName,"wb"))==NULL){

cout<<"Can not open file:"<<outfName<<endl;

exit(1);

}

cout<<"Pocessing..."<<endl;

unsigned char ch;

unsigned int i,c;

for(i=0;i<=n;i++)HuffmanCode[i]=NULL;

CreateFromSourceFile();

rewind(infp);

ch=fgetc(infp);

while(feof(infp)==0){

c=char_index[ch];

for(i=0;i<strlen(HuffmanCode[c]);i++){

if(HuffmanCode[c][i]=='0')Write(0);

else Write(1);

}

ch=fgetc(infp);

}

WriteToOutfp();

fclose(infp);fclose(outfp);

cout<<"Process end."<<endl<<endl;

}

void HuffmanTree::UnCode()

{

char infName[256],outfName[256];

cout<<"Please input code file name:";

cin>>infName;

if((infp=fopen(infName,"rb"))==NULL){

cout<<"Can not open file:"<<infName<<endl;

exit(1);

}

if(feof(infp)!=0){

cout<<"Empty code file:"<<infName<<endl;

exit(1);

}

cout<<"Please input target file name:";

cin>>outfName;

if((outfp=fopen(outfName,"wb"))==NULL){

cout<<"Can not open file:"<<outfName<<endl;

exit(1);

}

cout<<"Pocessing..."<<endl;

unsigned int bit,c,i;

CreateFromCodeFile(); //建立Huffman树

Read(bit);

for(i=0;i<size;i++){

c=2*count-1; //2*count-1为根结点的下标

while((HT[c].lchild!=0||HT[c].rchild!=0)&&(feof(infp)==0)){

if(bit==0)c=HT[c].lchild;

else c=HT[c].rchild;

Read(bit);

}

fputc(Leaf[c],outfp); //将字符写入outfp中

}

fclose(infp);fclose(outfp);

cout<<"Process end."<<endl<<endl;

}

void HuffmanTree::Stat()

//统计字符出现频度并过滤掉频度为零的字符

{

unsigned int i,cha;

for(i=1;i<=n;i++)HT[i].weight=0;

size=0;

rewind(infp);

cha=fgetc(infp);

while(feof(infp)==0) //统计字符出现频度

{

HT[cha+1].weight++;

size++;

cha=fgetc(infp);

}

count=0;

for(cha=0;cha<n;cha++){ //过滤掉频度为零的字符

if(HT[cha+1].weight>0){

count++;

Leaf[count]=cha;

HT[count].weight=HT[cha+1].weight;

char_index[cha]=count;

}

void HuffmanTree::Select(unsigned int k, unsigned int &s1, unsigned int &s2)

{//s1,s2为权值最小的根,且s1的权值小于s2的权值

unsigned int root_count=0; //根结点数;

unsigned int root_index[n]; //根结点下标;

unsigned int tem,i,j;

for(i=1;i<=k;i++)

if(HT[i].parent==0)

root_index[root_count++]=i;

s1=root_index[0];s2=root_index[1];

if(HT[s1].weight>HT[s2].weight){

tem=s1;s1=s2;s2=tem;

}

for(i=2;i<root_count;i++){

j=root_index[i];

if(HT[j].weight<HT[s2].weight){

s2=j;

if(HT[s1].weight>HT[s2].weight){

tem=s1;s1=s2;s2=tem;

}

void HuffmanTree::Write(unsigned int bit) //向outfp中写入一个比特

{

buf.bits++;

buf.ch=(buf.ch<<1)+bit;

if(buf.bits==8){ //缓冲区已满,写入outfp

fputc(buf.ch,outfp);

buf.bits=0;

buf.ch=0;

}

void HuffmanTree::Write(unsigned int num,unsigned int k) //向outfp中写入k个比特

{

Stack<unsigned int> s;

unsigned int i,bit;

for(i=1;i<=k;i++){

s.push(num & 1);

num=(num>>1);

}

for(i=1;i<=k;i++){

s.top(bit);

Write(bit);

s.pop();

}

void HuffmanTree::WriteToOutfp() //强行写入outfp

{

unsigned int l=buf.bits;

if(l>0)

for(unsigned int i=0;i<8-l;i++)Write(0);

}

void HuffmanTree::Read(unsigned int &bit) //从infp中读出一个比特

{

if(buf.bits==0){

buf.ch=fgetc(infp);

buf.bits=8;

}

bit=(buf.ch & 128)>>7;

buf.ch=buf.ch<<1;

buf.bits--;

}

void HuffmanTree::Read(unsigned int &num,unsigned int k) //从infp中读出k个比特

{

unsigned int bit;

num=0;

for(unsigned int i=0;i<k;i++){

Read(bit);

num=(num<<1)+bit;

}

int HuffmanTree::NToBits(unsigned int num) //0~num之间的整数用二进位表示所需的位数

{

unsigned int l=0,power=1;

while(power<=num){

l++;power=power*2;

}

return l;

}

void HuffmanTree::CreateFromCodeFile() //由编码文件中存储的树结构建立Huffman树

{

buf.bits=0; //清空缓冲区

buf.ch=0;

unsigned int num,l,i;

rewind(infp);

fread(&size,sizeof(unsigned long),1,infp);

Read(count,8);

count=count+1;

for(i=1;i<=count;i++)

fread(&Leaf[i],sizeof(char),1,infp);

l=NToBits(2*count-1);

for(i=1;i<=count;i++){

HT[i].lchild=0;

HT[i].rchild=0;

}

for(i=count+1;i<=2*count-1;i++){

HT[i].lchild=(Read(num,l),num);

HT[i].rchild=(Read(num,l),num);

}

void HuffmanTree::CreateFromSourceFile()

//由被压缩文件建立Huffman树,将树结构存入编码文件的文件头部中,并求每个字符的Huffman编码

{

Stat();//统计字符出现频度并过滤掉频度为零的字符

//由被压缩文件建立Huffman树

unsigned int i,s1,s2;

for(i=1;i<=count;i++)HT[i].parent=HT[i].lchild=HT[i].rchild=0;

for(i=count+1;i<=2*count-1;i++){//建立Huffman树

Select(i-1,s1,s2); //选择parent为，权值最小的两个结点s1,s2

HT[s1].parent=HT[s2].parent=i;

HT[i].parent=0;HT[i].lchild=s1;HT[i].rchild=s2;

HT[i].weight=HT[s1].weight+HT[s2].weight;

}

//将树结构存入编码文件的文件头部中

unsigned int l;

buf.bits=0; //清空缓冲区

buf.ch=0;

rewind(outfp);

fwrite(&size,sizeof(unsigned int),1,outfp);

Write(count-1,8);

for(i=1;i<=count;i++)

fwrite(&Leaf[i],sizeof(char),1,outfp);

l=NToBits(2*count-1);

for(i=count+1;i<=2*count-1;i++){

Write(HT[i].lchild,l);

Write(HT[i].rchild,l);

}

//求每个字符的Huffman编码

unsigned int start,c,f;

char *cd; //编码临时变量

for(i=1;i<=n;i++)

if(HuffmanCode[i]!=NULL){

delete []HuffmanCode[i]; //释放存储空间

HuffmanCode[i]=NULL;

}

cd=new char[count]; //分配求编码的工作空间

cd[count-1]='\0'; //编码结束符

for(i=1;i<=count;i++){ //逐位求Huffman编码

start=count-1; //编码结束符位置

for(c=i,f=HT[i].parent;f!=0;c=f,f=HT[c].parent) //从叶到根求编码

if(HT[f].lchild==c)cd[--start]='0';

else cd[--start]='1';

HuffmanCode[i]=new char[count-start]; //为第i个字符编码分配空间

strcpy(HuffmanCode[i],&cd[start]); //从cd复制编码到HuffmanCode

}

delete []cd;

}

//Huffman.cpp

#include"Utility.h"

#include"Lk_stack.h"

#include"Huffman.h"

void main()

{

HuffmanTree hf;

char c=0;

while(c!='3')

{

cout<<endl<<"1.Huffman compress.";

cout<<endl<<"2.Huffman decompress.";

cout<<endl<<"3.Exit.";

cout<<endl<<"Please select:";

cin>>c;

switch(c)

{

case '1':

hf.Code();

break;

case '2':

hf.UnCode();

}

六、实验运行情况分析：

1.算法分析：本程序根据huffman树分配代码的方法，将文件中的数据冗余去掉，从而达到压缩文件的目的。Huffman编码本身作为一种冗余量最小的压缩编码，对于本题的压缩而言，能够达到目的，使文件得到压缩。但是对于比较小的字符串文件而言，数据压缩不了多少，所以没有明显看到文件大小的改变。对于huffman树的构造而言，可以有多种算法，但是本程序运用的这种方法，可能不是效率最高的方法，但应该是最好理解的方法。

2.运行结果分析：本程序对于小文件可以达到压缩和解压的目的，但是对于大文件压缩以后却不能解压，因为在写入文件的时候不是一个字符一个字符的写进去的，而是一个数组一个数组写进去的，这样很容易导致程序的错误，本来想一个数组一个数组地写进去可以提高效率，但是却导致了大文件不能解压的错误，所以本程序还需要进行改进，使它的功能更加完善。

3.运行环境分析：程序在本环境下运行，可以对根目录下的文件进行压缩，还可以对计算机其它盘上的文件进行压缩，这就需要在运行的时候注意，在根目录下的文件可以直接输入文件名，但如果被压缩或解压的文件在计算机其它盘上，输入文件名的时候要带上文件的路径。

================================================================================================================================

转载自：http://kxjinfeng.blog.163.com/blog/static/4837699620100511203328/

备注：因为图片不能是站外的，所以部分涉及图片的内容给删除了~点击上面的转载链接可以查看~

================================================================================================================================