哈夫曼编码译码器

最新推荐文章于 2024-04-30 21:48:39 发布

想跑步的小弱鸡

最新推荐文章于 2024-04-30 21:48:39 发布

阅读量263

点赞数

文章标签：数据结构 c++ 霍夫曼树 huffman tree

本文链接：https://blog.csdn.net/m0_73710531/article/details/130852627

版权

该文详细介绍了如何使用C语言实现哈夫曼编码的压缩和解压缩过程，包括文件读取、字符频率统计、哈夫曼树的构建、编码表生成、编码文件以及解码文件。同时，文章提到了计算压缩率的方法。

摘要由CSDN通过智能技术生成

要求如下

在这里我们看到，要求要进行编码和译码，并且要能计算压缩率，还要有文件读取的操作，因此我们先进行文件读取操作的学习

前置知识

1.fopen-打开一个文件,在这里踩了个坑是，在把文件路径复制到代码中时，需要在\后面再加上一个\,因为需要进行转义要把特殊字符\转换成普通字符，例子如下

#include <stdio.h>

int main() {
    printf("\\"); // 输出反斜杠字符 "\"
    
    return 0;
}

输出结果是\

2.fscanf 从文件中读入

3.fprintf 向文件输入

接下来我们就要开始编写程序了

首先我们要先定义一些基本的东西

struct character
{
    char ch;
    int number = 0;
};             //文件中出现的字符和其出现次数
struct huffmannode
{
    int weight;
    int parent;
    int left;
    int right;
    char value;
    int i;
};             //哈夫曼节点
struct huffmancode
{
    int bit[MAXBIT];
    int front;
};             //记录每个字符哈夫曼编码

最开始我们要计算不同字符在文本中出现的频率，在这里统一说明，我们的功能应该在函数中分开实现，代码如下

void tongji(character chars[], int& n)    //统计原文本中出现的字符及其使用频率
{
    FILE* file = NULL;
    if ((file = fopen("C:\\huff\\word.txt", "r")) == NULL)
    {
        printf("打开word.txt失败!\n");
        exit(0);
    }//如果没有打开，就输出失败，然后直接结束程序-后路
    char ch;
    int i = 0, j = 0, judge = 0;;
    fscanf(file, "%c", &ch);
    chars[i].ch = ch;
    chars[i].number++;
    i++;
    while (ch != '#')  //从文件读入文本，以#标记结束
    {
        fscanf(file, "%c", &ch);
        judge = 0;
        if (ch != '#')
        {
            for (j = 0; j < i; j++)
            {
                if (ch == chars[j].ch)  //如果读入的字符已被记录，则将出现次数+1
                {
                    chars[j].number++;
                    judge = 1;
                    break;
                }
            }
            if (judge == 0)
            {
                chars[i].ch = ch;
                chars[i].number++;
                i++;
            }//如果没有出现在已知，那么就创建新的
        }
    }
    n = i;//记录文件中出现的字母数目
    fclose(file);
}

在这里借鉴了一下别人的读取思路，总之就是将读入的字符记录，如果新读入的字符不在原本的字符样本里面，就把这个新加进去，思路还是十分明确的，然后我们要实现计算字符频率的事情，代码如下

void printout(character ch[], int n)
{
    int total = 0;
    for (int i = 0; i < n; i++)
    {
        total = total + ch[i].number;
    }
    for (int i = 0; i < n; i++)
    {
        printf("字符%c的频率是%f\n", ch[i].ch,(1.0*ch[i].number) / total);
    }
}

就把每个字符出现的频率加和然后除去自身就好了

接下来我们要实现计算哈夫曼树的构造和编码，相信你已经学过什么是哈夫曼树了，在这里我的程序没有进一步用堆优化，而是直接算出来最小的两个节点，代码如下

void createhuffmantree(huffmannode huffnode[], int n, character chars[])     //构建哈夫曼树
{
    int i, j, x1, x2;
    int maxweight = 50000;
    for (i = 0; i < 2 * n - 1; i++)
    {
        huffnode[i].weight = chars[i].number;
        huffnode[i].parent = TOP;
        huffnode[i].left = null;
        huffnode[i].right = null;
        huffnode[i].value = chars[i].ch;
    }//初始化，2*n-1的原因是合并n-1次会出现n-1个节点
    for (i = 0; i < n - 1; i++)//有n个节点就要进行n-1次合并
    {
        maxweight = MAXWEIGHT;
        x1 = x2 = 0;
        for (j = 0; j < n + i; j++)
        {
            if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight)
            {
                maxweight = huffnode[j].weight;
                x1 = j;
            }
        }//找到第一小节点
        maxweight = MAXWEIGHT;
        for (j = 0; j < n + i; j++)
        {
            if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight && j != x1)
            {
                maxweight = huffnode[j].weight;
                x2 = j;
            }
        }//找到第二小节点，多了一个判定条件
        huffnode[x1].parent = n + i;
        huffnode[x2].parent = n + i;//将新节点和最小两个节点连接
        huffnode[n + i].weight = huffnode[x1].weight + huffnode[x2].weight;
        huffnode[n + i].left = x1;
        huffnode[n + i].right = x2;
    }
}

十分常规的写法，接下来就是计算每个字符的哈夫曼编码，一般都是从叶节点从下向上算，我们用一个循环来做，我们知道，树的根节点是不计入计算的，所以，循环终止的条件就是抵达根节点，代码如下

void Coding(huffmannode huffnode[], huffmancode huffcode[], int n)        //构建字符的哈夫曼编码表
{
    int father;
    int now, i;
    huffmancode temp;
    int p;
    for (i = 0; i < n; i++)
    {
        huffcode[i].front = MAXBIT - 1;
        now = i;
        father = huffnode[i].parent;
        while (father != TOP)//一直到根节点，根节点不存储编码
        {
            if (now == huffnode[father].left) {
                huffcode[i].bit[huffcode[i].front] = 0;//左子树存储0
            }
            else {
                huffcode[i].bit[huffcode[i].front] = 1;//右子树存储1
            }
            now = father;
            father = huffnode[father].parent;
            huffcode[i].front--;
        }
        ++huffcode[i].front;//多减了一位，一定要有自加一次
    }
}

接下来就是把文件进行编码了，有了上面的函数，这一步是十分轻松的，代码如下

void CodingFile(huffmannode huffnode[], huffmancode huffcode[], int n, int& chnum)     //对原文本编码并将编码写入文件
{
    FILE* file1 = NULL;
    FILE* file2 = NULL;
    if ((file1 = fopen("C:\\huff\\word.txt", "r")) == NULL)
    {
        printf("打开word.txt失败!\n");
        exit(0);
    }
    if ((file2 = fopen("C:\\huff\\asc.txt", "w")) == NULL)
    {
        printf("打开asc.txt失败!\n");
        exit(0);
    }
    char ch;
    fscanf(file1, "%c", &ch);
    if (ch != '#')
    {
        for (int i = 0; i < n; i++)
        {
            if (huffnode[i].value == ch)
            {
                for (int j = huffcode[i].front; j < MAXBIT; j++)
                {
                    fprintf(file2, "%d", huffcode[i].bit[j]);
                    chnum++;
                }
            }
        }
    }
    while (ch != '#') {
        fscanf(file1, "%c", &ch);
        if (ch != '#')
        {
            for (int i = 0; i < n; i++)
            {
                if (huffnode[i].value == ch)
                {
                    for (int j = huffcode[i].front; j < MAXBIT; j++)
                    {
                        fprintf(file2, "%d", huffcode[i].bit[j]);
                        chnum++;
                    }
                }
            }
        }
    };
    fclose(file1);
    fclose(file2);
}

解码代码如下

void Decoding(huffmannode huffnode[], int n)
{
    FILE* fc = NULL, * fd = NULL;
    if ((fc = fopen("C:\\huff\\asc.txt", "r")) == NULL)
    {
        printf("打开asc.txt失败!\n");
        exit(0);
    }
    if ((fd = fopen("C:\\huff\\outcome.txt", "w")) == NULL)
    {
        printf("打开outcome.txt失败!\n");
        exit(0);
    }
    char nums[100000];
    fscanf(fc, "%s", nums);//因为译码文件没有空格
    int i = 0, j = 0, temp;
    for (i = 0; nums[i] != '\0'; i++);
    while (j < i)
    {
        temp = 2 * n - 2;//找到根节点
        while (huffnode[temp].left != null && huffnode[temp].right != null)
        {
            if (nums[j] == '0')
                temp = huffnode[temp].left;
            else
                temp = huffnode[temp].right;
            j++;
        }
        fprintf(fd, "%c", huffnode[temp].value);
    }
    fclose(fc);
    fclose(fd);
}

计算压缩率

void Calculate_yasuo(character ch[], int n, int chnum)   //计算压缩率
{
    int total = 0;
    for (int i = 0; i < n; i++)
    {
        total =total+ ch[i].number;
    }
    printf("=>压缩成功，压缩比%f%%", 1.0*chnum /(8*total * 100));//除8是因为在txt文档中，一个字符占8位
}

在这里加上两个%%是因为要进行转义

源代码如下

#include<iostream>
#include<stdio.h>
#include<stdlib.h>
using namespace std;
#define MAXWEIGHT 10000
#define MAXBIT 100
#define TOP -1
#define null -1
struct character
{
    char ch;
    int number = 0;
};             //文件中出现的字符和其出现次数
struct huffmannode
{
    int weight;
    int parent;
    int left;
    int right;
    char value;
    int i;
};             //哈夫曼节点
struct huffmancode
{
    int bit[MAXBIT];
    int front;
};             //记录每个字符哈夫曼编码
void tongji(character chars[], int& n)    //统计原文本中出现的字符及其使用频率
{
    FILE* file = NULL;
    if ((file = fopen("C:\\huff\\word.txt", "r")) == NULL)
    {
        printf("打开word.txt失败!\n");
        exit(0);
    }//如果没有打开，就输出失败，然后直接结束程序-后路
    char ch;
    int i = 0, j = 0, judge = 0;;
    fscanf(file, "%c", &ch);
    chars[i].ch = ch;
    chars[i].number++;
    i++;
    while (ch != '#')  //从文件读入文本，以#标记结束
    {
        fscanf(file, "%c", &ch);
        judge = 0;
        if (ch != '#')
        {
            for (j = 0; j < i; j++)
            {
                if (ch == chars[j].ch)  //如果读入的字符已被记录，则将出现次数+1
                {
                    chars[j].number++;
                    judge = 1;
                    break;
                }
            }
            if (judge == 0)
            {
                chars[i].ch = ch;
                chars[i].number++;
                i++;
            }//如果没有出现在已知，那么就创建新的
        }
    }
    n = i;//记录文件中出现的字母数目
    fclose(file);
}
void createhuffmantree(huffmannode huffnode[], int n, character chars[])     //构建哈夫曼树
{
    int i, j, x1, x2;
    int maxweight = 50000;
    for (i = 0; i < 2 * n - 1; i++)
    {
        huffnode[i].weight = chars[i].number;
        huffnode[i].parent = TOP;
        huffnode[i].left = null;
        huffnode[i].right = null;
        huffnode[i].value = chars[i].ch;
    }//初始化，2*n-1的原因是合并n-1次会出现n-1个节点
    for (i = 0; i < n - 1; i++)//有n个节点就要进行n-1次合并
    {
        maxweight = MAXWEIGHT;
        x1 = x2 = 0;
        for (j = 0; j < n + i; j++)
        {
            if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight)
            {
                maxweight = huffnode[j].weight;
                x1 = j;
            }
        }//找到第一小节点
        maxweight = MAXWEIGHT;
        for (j = 0; j < n + i; j++)
        {
            if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight && j != x1)
            {
                maxweight = huffnode[j].weight;
                x2 = j;
            }
        }//找到第二小节点，多了一个判定条件
        huffnode[x1].parent = n + i;
        huffnode[x2].parent = n + i;//将新节点和最小两个节点连接
        huffnode[n + i].weight = huffnode[x1].weight + huffnode[x2].weight;
        huffnode[n + i].left = x1;
        huffnode[n + i].right = x2;
    }
}
void Coding(huffmannode huffnode[], huffmancode huffcode[], int n)        //构建字符的哈夫曼编码表
{
    int father;
    int now, i;
    huffmancode temp;
    int p;
    for (i = 0; i < n; i++)
    {
        huffcode[i].front = MAXBIT - 1;
        now = i;
        father = huffnode[i].parent;
        while (father != TOP)//一直到根节点，根节点不存储编码
        {
            if (now == huffnode[father].left) {
                huffcode[i].bit[huffcode[i].front] = 0;//左子树存储0
            }
            else {
                huffcode[i].bit[huffcode[i].front] = 1;//右子树存储1
            }
            now = father;
            father = huffnode[father].parent;
            huffcode[i].front--;
        }
        ++huffcode[i].front;//多减了一位，一定要有自加一次
    }
}
void CodingFile(huffmannode huffnode[], huffmancode huffcode[], int n, int& chnum)     //对原文本编码并将编码写入文件
{
    FILE* file1 = NULL;
    FILE* file2 = NULL;
    if ((file1 = fopen("C:\\huff\\word.txt", "r")) == NULL)
    {
        printf("打开word.txt失败!\n");
        exit(0);
    }
    if ((file2 = fopen("C:\\huff\\asc.txt", "w")) == NULL)
    {
        printf("打开asc.txt失败!\n");
        exit(0);
    }
    char ch;
    fscanf(file1, "%c", &ch);
    if (ch != '#')
    {
        for (int i = 0; i < n; i++)
        {
            if (huffnode[i].value == ch)
            {
                for (int j = huffcode[i].front; j < MAXBIT; j++)
                {
                    fprintf(file2, "%d", huffcode[i].bit[j]);
                    chnum++;
                }
            }
        }
    }
    while (ch != '#') {
        fscanf(file1, "%c", &ch);
        if (ch != '#')
        {
            for (int i = 0; i < n; i++)
            {
                if (huffnode[i].value == ch)
                {
                    for (int j = huffcode[i].front; j < MAXBIT; j++)
                    {
                        fprintf(file2, "%d", huffcode[i].bit[j]);
                        chnum++;
                    }
                }
            }
        }
    };
    fclose(file1);
    fclose(file2);
}
void Decoding(huffmannode huffnode[], int n)
{
    FILE* fc = NULL, * fd = NULL;
    if ((fc = fopen("C:\\huff\\asc.txt", "r")) == NULL)
    {
        printf("打开asc.txt失败!\n");
        exit(0);
    }
    if ((fd = fopen("C:\\huff\\outcome.txt", "w")) == NULL)
    {
        printf("打开outcome.txt失败!\n");
        exit(0);
    }
    char nums[100000];
    fscanf(fc, "%s", nums);//因为译码文件没有空格
    int i = 0, j = 0, temp;
    for (i = 0; nums[i] != '\0'; i++);
    while (j < i)
    {
        temp = 2 * n - 2;//找到根节点
        while (huffnode[temp].left != null && huffnode[temp].right != null)
        {
            if (nums[j] == '0')
                temp = huffnode[temp].left;
            else
                temp = huffnode[temp].right;
            j++;
        }
        fprintf(fd, "%c", huffnode[temp].value);
    }
    fclose(fc);
    fclose(fd);
}
void printout(character ch[], int n)
{
    int total = 0;
    for (int i = 0; i < n; i++)
    {
        total = total + ch[i].number;
    }
    for (int i = 0; i < n; i++)
    {
        printf("字符%c的频率是%f\n", ch[i].ch,(1.0*ch[i].number) / total);
    }
}
void Calculate_yasuo(character ch[], int n, int chnum)   //计算压缩率
{
    int total = 0;
    for (int i = 0; i < n; i++)
    {
        total =total+ ch[i].number;
    }
    printf("=>压缩成功，压缩比%f%%", 1.0*chnum /(8*total * 100));//除8是因为在txt文档中，一个字符占8位
}

int main()
{
    huffmannode huffnode[500];
    character chars[500];
    huffmancode huffcode[500];
    int n = 0, chnum = 0;
    cout << "********请选择：1 压缩 2 解压 ********|" << endl;
    cout << "！！！！！注意！！！！！              |" << endl;
    cout << "****系统只支持编码或者解压分开进行****|" << endl;
    cout << "************输入0退出系统*************|" << endl;
    cout << "---------------------------------------" << endl;
    int x; cin >> x;
    while (x != 0) {
        if (x == 1) {
            cout << "系统将自动压缩你保存在本地的word.txt文件" << endl;
            tongji(chars, n);
            printout(chars, n);
            createhuffmantree(huffnode, n, chars);
            Coding(huffnode, huffcode, n);
            CodingFile(huffnode, huffcode, n, chnum);
            Calculate_yasuo(chars, n, chnum);
            cout << "------------------------------------------" << endl;
            cout << "********请选择：1 压缩 2 解压 ********|" << endl;
            cout << "！！！！！注意！！！！！              |" << endl;
            cout << "****系统只支持编码或者解压分开进行****|" << endl;
            cout << "************输入0退出系统*************|" << endl;
            cout << "---------------------------------------" << endl;
        }
        else {
            cout << "系统将自动为您解压您保存在本地的asc.txt文件" << endl;
            tongji(chars, n);
            Decoding(huffnode, n);
            cout << "=>解压成功，内容已保存在本地outcome.txt文件" << endl;
            cout << "------------------------------------------" << endl;
            cout << "********请选择：1 压缩 2 解压 ********|" << endl;
            cout << "！！！！！注意！！！！！              |" << endl;
            cout << "****系统只支持编码或者解压分开进行****|" << endl;
            cout << "************输入0退出系统*************|" << endl;
            cout << "---------------------------------------" << endl;
        }
        cin >> x;
        if (x == 0) {
            cout << "您已成功退出系统!";
            exit(0);
        }
    }//进行循环，保证能编码和译码都能进行
    return 0;
}