利用哈夫曼树实现文件压缩

最新推荐文章于 2024-02-14 22:41:11 发布

Melody20210917

最新推荐文章于 2024-02-14 22:41:11 发布

阅读量6.2k

点赞数 14

分类专栏：数据结构文章标签：文件压缩数据结构源代码压缩结构

本文链接：https://blog.csdn.net/chenjuan0530/article/details/58007848

版权

数据结构专栏收录该内容

7 篇文章 1 订阅

订阅专栏

实现原理：

1.按照字符分析要压缩的文件得出结果（有哪些字符，每个字符出现的次数）。

2.根据字符出现的次数构建哈夫曼树（得出字符的哈夫曼编码）。

3.根据字符的哈夫曼编码进行转换、压缩，然后创建压缩文件。

4.读取压缩文件，读出哈夫曼编码和字符的对照表。解压缩。

数据结构的设计：

1.保存字符次数和字符的数据结构

struct _symbol{

char character;//字符

unsigned int number;//字符出现的次数

char huffecode[20];//编码

}

2.用一个结构体保存所有字符的信息

struct _filestate{

char symbol_count;//字符种类

struct _symbol symbol_array[128];//字符信息

};

3.哈夫曼树节点

struct node{

struct _symbol symbol;

struct node *left;

struct node *right;

};

4.哈夫曼编码

struct code{

char character;

char codebit[100];

};

源代码实现：

#include "header.h"
#include <string.h>

//单个字符的结构体
struct  _symbol{
	char character;//字符
	unsigned int number;//字符的次数
    	char huffcode[20]; //编码
};
//总结构体
struct _filestate{
    char symbol_count;//多少种字符
    struct _symbol symbol_array[128]; //结构体数组保存各种字符
};
//文件字符总结构体
struct _filestate filestate;

//树节点
struct _node{
    struct _node *parent;
    struct _symbol symbol;
    struct _node *left;
    struct _node *right;
    int idx; //在filestate中的索引
};

typedef struct _node node;


//创建节点
node *creatnode(struct _symbol symbol,int idx)
{
    
    node *temp=malloc(sizeof(node));
    temp->symbol=symbol;
    temp->left=NULL;
    temp->right=NULL;
    temp->idx=idx;
    temp->parent=NULL;
    return temp;
}


//构建一棵哈弗曼树
node *createhaffman(struct _filestate *pfilestate)
{
    //创建一维数组，里面保存指针
    node **array=malloc(pfilestate->symbol_count*sizeof(node *));
    node *temp=NULL;
    int i;
    for(i=0;i<pfilestate->symbol_count;i++)
    {
        array[i]=creatnode(pfilestate->symbol_array[i],i);
    }

    int j;
    int minidx,secminidx;
    for(i=0;i<pfilestate->symbol_count-1;i++)
    {
        j=0;
        //找出第一个非空节点
        while(array[j]==NULL)
        {
            j++;
        }
        minidx=j;

        //拿第一个非空节点和后面的对比
        for(j=0;j<pfilestate->symbol_count;j++)
        {
            if(array[j]&&array[j]->symbol.number<array[minidx]->symbol.number)
            {
                minidx=j;
            }

        }

        j=0;
        while(array[j]==NULL||j==minidx)
        {
            j++;
        }
        secminidx=j;
        for(j=0;j<pfilestate->symbol_count;j++)
        {
            if(array[j]&&j!=minidx&&array[j]->symbol.number<array[secminidx]->symbol.number)
            {
              secminidx=j;  
            }
        }

        struct _symbol symbol={0,array[minidx]->symbol.number+array[secminidx]->symbol.number};
        temp=creatnode(symbol,-1);        
        temp->left=array[minidx];
        temp->right=array[secminidx];
        temp->parent=NULL;

        array[minidx]->parent=temp;
        array[secminidx]->parent=temp;
           
        array[minidx]=temp;
        array[secminidx]=NULL;
    }

    free(array);
    array=NULL;

    return temp;
}

//判断是否叶子节点
int isleaf(node *cur)
{
    if(cur->left==NULL&&cur->right==NULL)
    return 1;
    else
    {
        return 0;
    }
}

//取得哈夫曼编码
void huffmancode(node *leaf,char code[20])
{
    char a[20]={0};
    node *temp=leaf;
    int i=0;
    while(temp->parent)
    {
        if(temp->parent->left==temp)
        {
            a[i]=2;
        }
        else
        {
            a[i]=1;
        }
        temp=temp->parent;
        i++;
    }
    int length=i;
    char tmp=0;
    for(i=0;i<length/2;i++)
    {
        tmp=a[i];
        a[i]=a[length-i-1];
        a[length-i-1]=tmp;
    }
    strcpy(code,a);
}

//取得叶子的哈夫曼码并填充到文件结构体里
void fillhuffmancode(node *root,struct _filestate *pfilestate)
{
    int idx;

    if(root)
    {
        if(isleaf(root))
        {
            idx=root->idx;
            huffmancode(root,pfilestate->symbol_array[idx].huffcode);
        }
        else
        {
            fillhuffmancode(root->left,pfilestate);
            fillhuffmancode(root->right,pfilestate);
        }

    }


}


//根据频率创建树且获取编码
void createandcoding(struct _filestate *pfilestate)
{
    node *root=createhaffman(pfilestate);
    fillhuffmancode(root,pfilestate);
    //haffmancode(root,0,&dictionary);
    
     int i,j;
    for(i=0;i<pfilestate->symbol_count;i++)
    {
        printf("%c ",pfilestate->symbol_array[i].character);
        for(j=0;j<20;j++)
        printf("%d",pfilestate->symbol_array[i].huffcode[j]);
        puts("\n");
    }
}






//创建结构体
void calccount(struct _filestate *pfilestate,char ch)
{
    int i;
    int flagexist=0;
    for(i=0;i<pfilestate->symbol_count;i++)
    {
        if(ch==pfilestate->symbol_array[i].character)
        {
            pfilestate->symbol_array[i].number++;
            flagexist=1;
            break;
        }
    }
    if(!flagexist)
    {
        pfilestate->symbol_array[pfilestate->symbol_count].character=ch;
        pfilestate->symbol_count++;
    }

}

//查字典，获得编码
void lookupdict(char character,struct _filestate *pfilestate,char bitcode[])
{
    int i=0;
    for(i=0;i<pfilestate->symbol_count;i++)
    {
        if(character==pfilestate->symbol_array[i].character)
        {
            strcpy(bitcode,pfilestate->symbol_array[i].huffcode);
            break;
        }
    }
}


//写文件头
void writeheader(int fd,struct _filestate *pfilestate)
{
    write(fd,pfilestate,sizeof(struct _filestate));
}

//读文件头
void readheader(int fd,struct _filestate *pfilestate)
{
    read(fd,pfilestate,sizeof(struct _filestate));
}

//写编码
void writecode(int fd_r,int fd_w,struct _filestate *pfilestate)
{
    int flgend=0;
    //临时变量，用来保存取出来哈夫曼编码
    char bitcode[20];
    //临时变量
    char temp;
    char buffer;
    //写字节的索引
    int buf_idx=0;
    //读到文件尾
    int bitcode_idx=0;
    //从源文件读一个字符
    read(fd_r,&temp,1);
    //从字典里查出字符的哈夫曼编码
    lookupdict(temp,pfilestate,bitcode);
   
   //一直读字符，直到文件的末尾
    while(!flgend)
    {
    //用来写的字符，需要根据哈夫曼编码构建
    buffer=0;
    //现在指向buffer的第0位
    buf_idx=0;
    while(buf_idx<8)
    {
        if(bitcode[bitcode_idx]==2)
        {
            //把对应的位清零
            buffer&=~(1<<buf_idx);
            bitcode_idx++;
            buf_idx++;
        }
        else if(bitcode[bitcode_idx]==1)
        {
            //给对应的位置1
            buffer|=(1<<buf_idx);
            bitcode_idx++;
            buf_idx++;
        }        
        else//如果编码数组里的编码读完了
        {
            //再取一个字符
            if((read(fd_r,&temp,1))==1)
            {
            //根据字符取得哈夫曼码
            lookupdict(temp,pfilestate,bitcode);
            bitcode_idx=0;
            }            
            else

            {
                flgend=1;
                break;
            }
        }
        
    }
    write(fd_w,&buffer,1);
    }
}


//写到文件中
void writetofile(char *filename,struct _filestate *pfilestate)
{
    
    int fd1=open(filename,O_RDONLY);
    if(fd1<0)
    {
        perror("open");
        exit(EXIT_FAILURE);
    }
    char *newfilename=strcat(filename,".aar");
    int fd2=open(newfilename,O_CREAT|O_TRUNC|O_WRONLY,0666);
    if(fd2<0)
    {
        perror("open");
        exit(EXIT_FAILURE);
    }
    //将字典结构体先写到文件里面，方便解压时读取
    writeheader(fd2,pfilestate);
    //写编码
    writecode(fd1,fd2,pfilestate);
    
    close(fd1);
    close(fd2);     
}

//分析文件，得到各字符的次数
void analyse(char *filename)
{
        int fd=open(filename,O_RDONLY);
    if(fd<0)
    {
        return;
    }
    char temp;
    while((read(fd,&temp,1))==1)
    {
        calccount(&filestate,temp);
    }
    close(fd);
    int i;
    for(i=0;i<filestate.symbol_count;i++)
    {
        printf("char is %c ascii %d, number is %d\n",\
        filestate.symbol_array[i].character,\
        filestate.symbol_array[i].character,\
        filestate.symbol_array[i].number);

    }
}

    

//压缩
void compress(char *filename)
{
    analyse(filename);
    createandcoding(&filestate);
    writetofile(filename,&filestate);
    
}

//写解压缩文件
void writedecompressfile(int fd, char *filename,node *root)
{
    char *newfilename=strcat(filename,".un");
    node *tmpnode=root;
    int fd_w=open(newfilename,O_CREAT|O_WRONLY|O_TRUNC,0600);
    if(fd_w<0)
    {
        perror("open");
        return;
    }
    char temp;
    int bit_idx;
    while(read(fd,&temp,1)==1)
    {
        bit_idx=0;
        while(bit_idx<8)
        {
            if(temp&(1<<bit_idx))
            {
                tmpnode=tmpnode->right;
            }
            else
            {
                tmpnode=tmpnode->left;
            }
            if(isleaf(tmpnode))
            {
                write(fd_w,&(tmpnode->symbol.character),1);
                tmpnode=root;
            }
            bit_idx++;
        }


    }
    close(fd_w);
}


//解压
void decompress(char *filename)
{
    
    int fd=open(filename,O_RDWR);
    if(fd<0)
    {
        perror("open");
        return;
    }
    memset(&filestate,0,sizeof(filestate));
    //读文件头
    readheader(fd,&filestate);


    //创建哈弗曼树
    node *root=NULL;
    root=createhaffman(&filestate);

    writedecompressfile(fd,filename,root);

    close(fd);


}

int main(int argc,char const *argv[])
{
    char *filename=(char *)argv[2];
    if(argc<3)
    {
        puts("用法：tar -y[-j] filename");
        exit(EXIT_FAILURE);
    }
    if(strcmp(argv[1],"-y")==0)
    {
        compress(filename);
    }
    else if(strcmp(argv[1],"-j")==0)
    {
        decompress(filename);
    }
    else
    {
        puts("参数错误！");
        exit(EXIT_FAILURE);
    }

}

这样的压缩可以做到压缩50%的效果。

Melody20210917

关注

14
点赞
踩
66

收藏

觉得还不错? 一键收藏
4
评论
利用哈夫曼树实现文件压缩

实现原理：1.按照字符分析要压缩的文件得出结果（有哪些字符，每个字符出现的次数）。2.根据字符出现的次数构建哈夫曼树（得出字符的哈夫曼编码）。3.根据字符的哈夫曼编码进行转换、压缩，然后创建压缩文件。4.读取压缩文件，读出哈夫曼编码和字符的对照表。解压缩。数据结构的设计：1.保存字符次数和字符的数据结构struct _symbol{char charac
复制链接

扫一扫