哈夫曼树和哈夫曼编码

caka07

已于 2023-05-11 00:44:45 修改

阅读量72

点赞数

分类专栏：数据结构学习文章标签： c++ 开发语言

于 2023-05-11 00:44:34 首次发布

本文链接：https://blog.csdn.net/caka07/article/details/130612892

版权

数据结构学习专栏收录该内容

17 篇文章 0 订阅

订阅专栏

本文展示了一个C++程序，该程序使用哈夫曼编码对文本文件进行压缩和解压缩。程序首先统计输入文件中字符的频率，然后构建哈夫曼树并生成编码。它将源文件转换为二进制编码，创建编码文件，并能从编码文件恢复原始内容。程序在Clion中使用CMake构建，遇到了文件访问的问题，但作者已识别出问题可能在于编译输出路径。

摘要由CSDN通过智能技术生成

代码写得比较急，在调试时出现不能访问文件的错误，估计是Clion编译器CMake项目在编译文件时把编译文件放在cmake-build-debug文件夹里，访问不到。目前处于只能统计权重的状态，后续改进后把运行结果放上来。

//
// Created by A on 2023/5/11.
//
//显示每个字符以一个字节进行二进制编码后的编码文件
/*
 * @Description:Huffman code&decode
 * @Author: Caka
 * @Date: 2023-05-11 00:31:25
 */
#include <iostream>
#include <cstring>
#include <string>
#include <fstream>
#include <vector>
#include <bitset>
#include <Windows.h>

using namespace std;

// 定义哈夫曼树节点
typedef struct {
    int weight;
    int parent;
    int l_child;
    int r_child;
    char data;
} HTNode, * HuffmanTree;
typedef char** HuffmanCode;

//字符种类数
static int N;

//自定义函数声明
void frequencyRecord(HuffmanTree& HT);

void select(HuffmanTree HT, int num, int& child1, int& child2);

void HuffmanCoding(HuffmanTree& HT, HuffmanCode& HC);

void zip(HuffmanTree& HT, HuffmanCode& HC, vector<string>& code);

void unzip(HuffmanTree& HT, HuffmanCode& HC, vector<string>& code);

void binaryCode();

//统计该文件中各种字符的频率
void frequencyRecord(HuffmanTree& HT) {
    HuffmanTree TEMP;
    TEMP = new HTNode[130];
    for (int i = 0; i < 130; ++i) {
        TEMP[i].weight = 0;
    }
    ifstream originFile("D:\\JetBrains\\Clioncpp\\Structure\\BinaryTree\\poem.txt");
    originFile.seekg(0);
    if (!originFile) {
        cout << "Can't find the file!" << endl;
    }
    else {
        char _data;
        cin.unsetf(ios::skipws);
        while (!originFile.eof()) {
            if (originFile.get(_data)) {
                TEMP[_data].data = _data;
                TEMP[_data].weight++;
            }
        }
        originFile.close();
    }
    for (int i = 0; i < 130; ++i) {
        if (TEMP[i].weight != 0) {
            N++;
        }
    }
    HT = new HTNode[2 * N];
    int k = 1;
    for (int i = 0; i < 130; ++i) {
        if (TEMP[i].weight != 0) {
            HT[k++] = TEMP[i];
        }
    }
}

//找出最小的两个叶子节点
void select(HuffmanTree HT, int num, int& child1, int& child2) {
    child1 = child2 = 0;
    int w1 = 0, w2 = 0;
    //Start finding...
    for (int i = 1; i <= num; ++i) {
        if (HT[i].parent == 0) {
            if (child1 == 0) {
                child1 = i;
                w1 = HT[i].weight;
                continue;
            }
            if (child2 == 0) {
                child2 = i;
                w2 = HT[i].weight;
                continue;
            }
            if (w1 > w2 && w1 > HT[i].weight) {
                child1 = i;
                w1 = HT[i].weight;
                continue;
            }
            if (w2 > w1 && w2 > HT[i].weight) {
                child2 = i;
                w2 = HT[i].weight;
                continue;
            }
        }
    }
    // 使得w1永远小于w2
    int temp;
    if (w1 > w2) {
        temp = child1;
        child1 = child2;
        child2 = temp;
    }
}

//对各字符进行 Huffman编码，显示每个字符的编码
void HuffmanCoding(HuffmanTree& HT, HuffmanCode& HC) {
    int m = 2 * N - 1;
    for (int i = 1; i <= N; ++i) {
        HT[i].parent = 0;
        HT[i].l_child = 0;
        HT[i].r_child = 0;
    }
    for (int i = N + 1; i <= m; ++i) {
        HT[i].weight = 0;
        HT[i].parent = 0;
        HT[i].l_child = 0;
        HT[i].r_child = 0;
        HT[i].data = '#';
    }
    int child1, child2;
    for (int i = N + 1; i <= m; i++) {
        select(HT, i - 1, child1, child2);
        HT[child1].parent = i;
        HT[child2].parent = i;
        HT[i].l_child = child1;
        HT[i].r_child = child2;
        HT[i].weight = HT[child1].weight + HT[child2].weight;
    }
    HC = new char* [N + 1];
    char* cd = new char[N];
    cd[N - 1] = '\0';
    int start, c, f;
    for (int i = 1; i <= N; i++) {
        start = N - 1;
        for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent) {
            if (HT[f].l_child == c) cd[--start] = '0';
            else cd[--start] = '1';
        }
        HC[i] = new char[N - start];
        strcpy(HC[i], &cd[start]);
    }
    delete[] cd;
    for (int i = 1; i <= N; i++) {
        if (HT[i].data == '\n') {
            cout << "回车" << " " << HC[i] << endl;
        }
        else if (HT[i].data == ' ') {
            cout << "空格" << " " << HC[i] << endl;
        }
        else {
            cout << HT[i].data << " " << HC[i] << endl;;
        }
    }
}

//将该文件翻译成 Huffman编码文件
void zip(HuffmanTree& HT, HuffmanCode& HC, vector<string>& code) {
    ofstream codeFile("codefile.txt");
    ifstream originFile("poem.txt");
    if (!codeFile) {
        cout << "Can't find the file!" << endl;
    }
    else {
        char _data;
        cin.unsetf(ios::skipws);
        while (!originFile.eof()) {
            if (originFile.get(_data)) {
                for (int i = 1; i <= N; ++i) {
                    if (HT[i].data == _data) {
                        codeFile << HC[i];
                        code.push_back(HC[i]);
                    }
                }
            }
        }
    }
    codeFile.close();
}

//再将 Huffman编码文件翻译成源文件
void unzip(HuffmanTree& HT, HuffmanCode& HC, vector<string>& code) {
    ofstream decodeFile("decodefile.txt");
    if (!decodeFile) {
        cout << "Can't find the file!" << endl;
    }
    else {
        vector<string>::iterator v = code.begin();
        while (v != code.end()) {
            for (int i = 1; i <= N; ++i) {
                if (HC[i] == *v) {
                    decodeFile << HT[i].data;
                }
            }
            v++;
        }
    }
    decodeFile.close();
}

//显示每个字符以一个字节进行二进制编码后的编码文件
void binaryCode() {
    ofstream binaryFile("binaryfile.txt");
    ifstream originFile("poem.txt");
    originFile.seekg(0);
    if (!originFile) {
        cout << "Can't find the file!" << endl;
    }
    else {
        char _data;
        cin.unsetf(ios::skipws);
        while (!originFile.eof()) {
            if (originFile.get(_data)) {
                bitset<8> data(_data);
                binaryFile << data;
            }
        }
        originFile.close();
    }
}

int main() {
    system("color 02");
    cout << "******************" << endl;
    cout << "*哈夫曼编码解码器*" << endl;
    cout << "******************" << endl;
    Sleep(1000);
    HuffmanTree HT;
    HuffmanCode HC;
    vector<string> code;
    cout << "\n\n\n需要进行编码的文件内容为：\n\n";
    system("more poem.txt");
    Sleep(2000);
    cout << "\n\n正在打开poem.txt进行二进制编码......" << endl;
    Sleep(2000);
    binaryCode();
    cout << "\n\n二进制编码内容为：" << endl;
    Sleep(2000);
    system("more binaryfile.txt");
    Sleep(2000);
    cout << "\n\n正在统计字符权重......" << endl;
    Sleep(2000);
    frequencyRecord(HT);
    HuffmanCoding(HT, HC);
    cout << "\n\n写入编码文件......" << endl;
    Sleep(2000);
    zip(HT, HC, code);
    cout << "\n\n编码结果为：" << endl;
    system("more codefile.txt");
    Sleep(2000);
    cout << "\n\n解码编码文件......" << endl;
    Sleep(2000);
    unzip(HT, HC, code);
    cout << "\n\n解码结果为：" << endl;
    Sleep(2000);
    system("more decodefile.txt");
    cout << "\n\n编码前占用字节空间：" << endl;
    Sleep(1000);
    ifstream file_before("binaryfile.txt", ios::binary | ios::ate);
    auto size_before = file_before.tellg();
    cout << size_before << endl;
    file_before.close();
    cout << "\n\n编码后占用字节空间：" << endl;
    Sleep(1000);
    ifstream file_after("codefile.txt", ios::binary | ios::ate);
    auto size_after = file_after.tellg();
    cout << size_after << endl;
    file_after.close();
    Sleep(1000);
    cout << "\n\n压缩率为：" << (static_cast<double>(size_before - size_after)) / size_before * 100.0 << "%" << endl;
    Sleep(100000);
    return 0;
}