哈夫曼树实现数据压缩(数组模拟)

染汐_

已于 2024-06-06 09:30:04 修改

阅读量79

点赞数 1

文章标签：数据结构算法 c++ c语言

于 2023-07-05 23:48:01 首次发布

本文链接：https://blog.csdn.net/weixin_74173457/article/details/131565919

版权

哈夫曼树简介

给定N个权值作为N个叶子节点，构造一棵二叉树，若该树的带权路径长度达到最小，称这样的二叉树为最优二叉树，也称为哈夫曼树。哈夫曼树是带权路径长度最短的树，权值较大的结点离根较近。

数组模拟实现哈希曼树代码实现

#include<cstring>
#include<iostream>
#include<fstream>
#include<algorithm>
using namespace std;
int readw(FILE *fp, int *name, int *count) {
    fseek(fp, 0, 0);
    memset(count, 0, sizeof(int) * 256);
    int j = 0;
    if (fp != NULL) {
        while (!feof(fp)) {
            char c = fgetc(fp);
            count[c]++;
        }
    }
    for (int i = 0; i < 256; i++) {
        if (count[i]) {
            name[j] = i;
            j++;
        }
    }
    return j;
}
typedef struct {
    short parent, lchild, rchild;
    short name;
    int weight;
} HNode, *HTree;


HTree create_huffman_tree(int *name, int *w, int n, HNode *ht) {
    for (int i = 0; i < n; i++) {
        ht[i].weight = w[name[i]];
        ht[i].name = name[i];
        ht[i].lchild = -1;
        ht[i].rchild = -1;
        ht[i].parent = -1;
    }
    for (int i = 0; i < n - 1; i++) {
        int max1 = 0x7fffffff, max2 = 0x7fffffff;
        int x1 = -1, x2 = -1;
        for (int j = 0; j < n + i; j++) {
            if (ht[j].weight < max1 && ht[j].parent == -1) {
                max2 = max1;
                x2 = x1;
                max1 = ht[j].weight;
                x1 = j;
            } else if (ht[j].weight < max2 && ht[j].parent == -1) {
                max2 = ht[j].weight;
                x2 = j;
            }
        }
        ht[x1].parent = n + i;
        ht[x2].parent = n + i;
        ht[n + i].weight = ht[x1].weight + ht[x2].weight;
        ht[n + i].lchild = x1;
        ht[n + i].rchild = x2;
        ht[n + i].parent = -1;
    }
    return n > 0 ? ht + 2 * n - 2 : ht;
}

void create_dictionary(HTree tp, int root, int level, char code, char dictionary[][256]) {
    static char str[256];

    if (tp[root].lchild == -1 && tp[root].rchild == -1) {
        str[level] = code;
        str[level+1] = '\0'; // 结束编码字符串
        strcpy(dictionary[tp[root].name], str+1);

    } else {
        if (tp[root].lchild != -1) {
            str[level] = code; // 将编码字符添加到编码字符串中
            create_dictionary(tp, tp[root].lchild, level + 1, '0', dictionary);
        }

        if (tp[root].rchild != -1) {
            str[level] = code; // 将编码字符添加到编码字符串中
            create_dictionary(tp, tp[root].rchild, level + 1, '1', dictionary);
        }
    }
}

int writeHuffmanCode(string target, char dictionary[256][256]) {
    ofstream ofs(target, ios::binary);
    ifstream ifs("C:\\Users\\86130\\Desktop\\压缩文件\\待压缩的文件.txt", ios::in);
    char bitBuffer = 0;  // 用于存储比特位的缓冲区
    int bitCount = 0;    // 缓冲区中的比特位数

    char a;
    while (ifs.get(a)) {
        const char* code = dictionary[a];
        for (int i = 0; code[i]; i++) {
            bitBuffer <<= 1;          // 左移一位，为下一个比特位腾出位置
            bitBuffer |= (code[i] - '0');  // 根据哈夫曼编码的值设置比特位
            bitCount++;

            if (bitCount == 8) {  // 缓冲区已满，写入一个字节到文件
                ofs.write(&bitBuffer, 1);
                string s = "";
                for(int i = 0;i<8;i++)
                {
                    s+=(bitBuffer&1)+'0';
                    bitBuffer>>=1;
                }
                reverse(s.begin(),s.end());
                cout<<s;
                bitBuffer = 0;
                bitCount = 0;
            }
        }
    }
    // 如果缓冲区中还有剩余的比特位，需要写入文件并进行补齐
    if (bitCount > 0) {
        bitBuffer <<= (8 - bitCount);  // 剩余的比特位左移
        ofs.write(&bitBuffer, 1);
        string s = "";
        for(int i = 0;i<bitCount;i++)
        {
            s+=(bitBuffer&1)+'0';
            bitBuffer>>=1;
        }
        reverse(s.begin(),s.end());
        cout<<s;
    }
    cout<<endl;
    ifs.close();
    ofs.close();
    return bitCount?(8-bitCount):0;
}
void decompress(string source, string target, HTree Hroot, HTree tp, int remainingBits) {
    ifstream file(source,std::ios::binary|std::ios::ate);
    std::streampos fileSize = file.tellg();file.close();
    file.close();
    ifstream ifs(source, ios::binary);
    ofstream ofs(target);

    int bitCount = 8;  // 缓冲区中比特位的数量
    char bitBuffer;

    HNode currentNode = *Hroot;  // 当前节点为根节点
    int num = 0;
    while (ifs.read(&bitBuffer, 1)) {
        num++;
        if(num==fileSize)break;
        for (int i = 7; i >= 0; i--) {
            if (bitCount == 0) {
                //bitBuffer = ifs.get();
                bitCount = 8;
            }
            bool bit = (bitBuffer >> (bitCount - 1)) & 1;  // 读取比特位
            bitCount--;
            cout<<bit;
            if (bit) {
                currentNode = tp[currentNode.rchild];  // 右子树
            } else {
                currentNode = tp[currentNode.lchild];  // 左子树
            }
            if (currentNode.lchild == -1 && currentNode.rchild == -1) {
                // 当前节点为叶子节点
                char character = currentNode.name;
                ofs.put(character);

                currentNode = *Hroot;  // 重置当前节点为根节点
            }
        }
    }

    // 处理最后一个字节的未满比特位的情况
    for (int i = 7; i >= remainingBits; i--) {
        bool bit = (bitBuffer >> i) & 1;  // 读取比特位
        cout<<bit;
        if (bit) {
            currentNode = tp[currentNode.rchild];  // 右子树
        } else {
            currentNode = tp[currentNode.lchild];  // 左子树
        }

        if (currentNode.lchild == -1 && currentNode.rchild == -1) {
            // 当前节点为叶子节点
            char character = currentNode.name;
            ofs.put(character);

            currentNode = *Hroot;  // 重置当前节点为根节点
        }
    }

    ifs.close();
    ofs.close();
}
int main()
{
    FILE *fp;
    fp = fopen("C:\\Users\\86130\\Desktop\\压缩文件\\待压缩的文件.txt","r");
    int name[256],count[256];
    int num = readw(fp,name,count);
    HNode ht[511];
    HTree Hroot = create_huffman_tree(name,count,num,ht);
    char dictionary[256][256];
    create_dictionary(ht,Hroot-ht,0,'#',dictionary);
    for(int i = 0;i<num;i++)
        cout<<(char)name[i]<<"的哈夫曼编码为"<<dictionary[name[i]]<<endl;
    int remainBits;
    remainBits = writeHuffmanCode("C:\\Users\\86130\\Desktop\\压缩文件\\压缩后的文件.txt",dictionary);
    cout<<"余下的字节数"<<remainBits<<endl;
    decompress("C:\\Users\\86130\\Desktop\\压缩文件\\压缩后的文件.txt","C:\\Users\\86130\\Desktop\\压缩文件\\解压后的文件.txt",Hroot,ht,remainBits);
}

如果其中两个函数

void writeHuffmanCode(string target,char dictionary[256][256])
{
    ofstream ofs;
    ofs.open(target,ios::binary);
    ifstream ifs;
    ifs.open("C:\\Users\\86130\\Desktop\\压缩文件\\待压缩的文件.txt",ios::in);
    if(!ifs.is_open()||!ofs.is_open())
    {
        cout<<"文件打开失败";
        return;
    }
    char a;
    while(ifs.get(a))
    {
        ofs.write(reinterpret_cast<const char*>(dictionary[a]), sizeof(dictionary));
    }
    ifs.close();
    ofs.close();
}
void decompress(string source,string target,HTree Hroot,HTree tp){
    ofstream ofs;
    ofs.open(target,ios::out);
    ifstream ifs(source,ios::binary);
    if(!ifs.is_open()||!ofs.is_open())
    {
        cout<<"文件打开失败";
        return;
    }
    char a;
    HNode p = *Hroot;
    while(ifs.get(a))
    {
        if(a=='0') {
            if(p.lchild!=-1)p = tp[p.lchild];
            else {
                ofs<<(char)p.name;
                p = tp[(*Hroot).lchild];
            }
        }
            else if(a=='1'){
                if(p.rchild!=-1)p = tp[p.rchild];
                else {
                    ofs<<(char)p.name;
                    p = tp[(*Hroot).rchild];
                }
            }

    }
    if(p.lchild==-1&&p.rchild==-1)ofs<<(char)p.name;
    ofs.close();
    ifs.close();
}

写成这个形式就会造成压缩后的文件变得更大，正确写法是使用位操作更充分的利用空间（后续会有stl版本，emmm还是看浏览量emmm）