C语言实现哈夫曼编码压缩存储

最新推荐文章于 2024-05-02 00:56:05 发布
Tian Meng
最新推荐文章于 2024-05-02 00:56:05 发布
阅读量256
点赞数 1
文章标签： c语言数据结构算法
本文链接：https://blog.csdn.net/m0_73790534/article/details/130350627
版权
数据结构刷题专栏收录该内容
9 篇文章 0 订阅
订阅专栏
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// 定义哈夫曼树的结构
typedef struct Node {
    char data;  // 字符
    int freq;   // 频率 
    struct Node* left, * right;  //左右子结点
} Node;

// 定义优先队列（最小堆）的结构体
typedef struct Heap {
    int size;   //堆的大小
    Node* arr[100];  //堆中的元素（哈夫曼树的节点）
} Heap;

//创建一个新的空堆
Heap* createHeap() {
    Heap* h = (Heap*)malloc(sizeof(Heap));
    h->size = 0;
    return h;
}

//向堆中插入一个节点
void insertHeap(Heap* h, Node* node) {
    h->size++;
    int i = h->size;
    //上滤操作
    while (i > 1 && node->freq < h->arr[i / 2]->freq) {
        h->arr[i] = h->arr[i / 2];
        i /= 2;
    }
    h->arr[i] = node;
}

// 从堆中提取具有最小频率的节点
Node* extractMin(Heap* h) {
    Node* min = h->arr[1];// 最小节点
    // 将最后一个节点移动到堆的顶部
    h->arr[1] = h->arr[h->size];
    h->size--;

    // 下滤操作
    int i = 1;
    while (1) {
        int smallest = i;
        int left = 2 * i;
        int right = 2 * i + 1;

        // 检查左子节点是否比当前节点更小
        if (left <= h->size && h->arr[left]->freq < h->arr[smallest]->freq) {
            smallest = left;
        }

        // 检查右子节点是否比当前节点更小
        if (right <= h->size && h->arr[right]->freq < h->arr[smallest]->freq) {
            smallest = right;
        }

        // 如果当前节点是最小节点，结束循环
        if (smallest != i) {
            Node* temp = h->arr[smallest];
            h->arr[smallest] = h->arr[i];
            h->arr[i] = temp;
            i = smallest;
        }
        else {
            break;
        }
    }
    return min;
}

// 如果当前节点是最小节点，结束循环
Node* buildHuffmanTree(char* str) {
    // 计算每个字符的频率
    int freq[256] = { 0 };
    for (int i = 0; str[i]; i++) {
        freq[(int)str[i]]++;
    }

    // 创建一个新堆，并将每个具有非零频率的字符作为新节点插入堆中
    Heap* h = createHeap();
    for (int i = 0; i < 256; i++) {
        if (freq[i]) {
            Node* node = (Node*)malloc(sizeof(Node));
            node->data = (char)i;
            node->freq = freq[i];
            node->left = node->right = NULL;
            insertHeap(h, node);
        }
    }

    // 使用贪心算法构建哈夫曼树
    while (h->size > 1) {
        Node* left = extractMin(h); // 提取最小频率的节点
        Node* right = extractMin(h); // 提取次小频率的节点

        // 创建一个新节点，将左右子节点分别连接到新节点上
        Node* newNode = (Node*)malloc(sizeof(Node));
        newNode->data = '\0'; // 父节点不包含字符
        newNode->freq = left->freq + right->freq; // 父节点的频率是左右子节点的频率之和
        newNode->left = left;
        newNode->right = right;
        insertHeap(h, newNode); // 将新节点插入堆中
    }

    // 堆中剩余的最后一个节点就是哈夫曼树的根节点
    Node* root = extractMin(h);
    free(h); // 释放堆的内存
    return root;
}

// 递归打印哈夫曼树中每个字符的编码
void printHuffmanCodes(Node* root, int arr[], int top) {
    if (root->left) {
        arr[top] = 0;
        printHuffmanCodes(root->left, arr, top + 1);
    }
    if (root->right) {
        arr[top] = 1;
        printHuffmanCodes(root->right, arr, top + 1);
    }

    // 如果节点没有子节点，说明是一个叶节点，即一个字符
    if (!(root->left) && !(root->right)) {
        printf("%c: ", root->data);
        for (int i = 0; i < top; i++) {
            printf("%d", arr[i]);
        }
        printf("\n");
    }
}

// 定义编码结构
typedef struct {
    char character; // 字符
    char code[256]; // 对应的哈夫曼编码
} Code;

// 递归生成哈夫曼编码
void generateCodes(Node* root, int arr[], int top, Code* codes, int* codeIndex) {
    // 遍历左子树，将当前路径值设置为0
	if (root->left) {
        arr[top] = 0;
        generateCodes(root->left, arr, top + 1, codes, codeIndex);
    }
    // 遍历右子树，将当前路径值设置为1
    if (root->right) {
        arr[top] = 1;
        generateCodes(root->right, arr, top + 1, codes, codeIndex);
    }
    // 如果节点没有子节点，说明是一个叶节点，即一个字符
    if (!(root->left) && !(root->right)) {
        // 将叶节点的字符存储在codes数组中
        codes[*codeIndex].character = root->data;
        // 将当前路径值存储在codes数组中
        for (int i = 0; i < top; i++) {
            codes[*codeIndex].code[i] = '0' + arr[i];
        }
        // 给当前路径值添加字符串结束符
        codes[*codeIndex].code[top] = '\0';
        // 更新codeIndex，为下一个字符做准备
        (*codeIndex)++;
    }
}

// 对输入字符串进行哈夫曼编码
void encode(Node* root, char* input, char** output) {
    // 定义一个数组存储每个字符的哈夫曼编码
    Code codes[256];
    int arr[100], top = 0, codeIndex = 0;
    // 生成哈夫曼编码
    generateCodes(root, arr, top, codes, &codeIndex);

    // 计算编码后字符串的长度
    int encodedLength = 0;
    for (int i = 0; input[i]; i++) {
        for (int j = 0; j < codeIndex; j++) {
            if (input[i] == codes[j].character) {
                encodedLength += strlen(codes[j].code);
                break;
            }
        }
    }

    // 分配内存并设置输出字符串
    *output = (char*)malloc((encodedLength + 1) * sizeof(char));
    int outputIndex = 0;
    // 遍历输入字符串中的每个字符
    for (int i = 0; input[i]; i++) {
        // 在哈夫曼编码数组中找到对应的编码
        for (int j = 0; j < codeIndex; j++) {
            if (input[i] == codes[j].character) {
                // 将编码拷贝到输出字符串中，并更新输出索引
                strcpy_s(*output + outputIndex, encodedLength + 1 - outputIndex, codes[j].code);
                outputIndex += strlen(codes[j].code);
                break;
            }
        }
    }

    // 给输出字符串添加字符串结束符
    (*output)[outputIndex] = '\0';
}


// 对输入的二进制字符串进行哈夫曼解码
void decode(Node* root, char* input, char* output) {
    // 定义一个指针从根节点开始遍历
    Node* current = root;
    int outputIndex = 0;
    // 遍历输入字符串中的每个二进制位
    for (int i = 0; input[i]; i++) {
        // 根据二进制位更新当前节点指针，0表示左子树，1表示右子树
        current = input[i] == '0' ? current->left : current->right;

        // 如果当前节点没有子节点，说明是一个叶节点，即一个字符
        if (!(current->left) && !(current->right)) {
            // 将当前叶节点的字符添加到输出字符串
            output[outputIndex++] = current->data;
            // 将当前节点指针重置为根节点，以便继续解码剩余的输入字符串
            current = root;
        }
    }
    // 给输出字符串添加字符串结束符
    output[outputIndex] = '\0';

}

int main() {
    // 定义两个字符数组，分别用于存储用户输入的字符串和二进制编码字符串
    char input[100], binaryInput[100];

    // 提示用户输入字符串，并使用 fgets 读取输入的字符串
    printf("输入一串字符: ");
    fgets(input, sizeof(input), stdin);
    input[strcspn(input, "\n")] = '\0';

    // 根据输入的字符串构建哈夫曼树
    Node* root = buildHuffmanTree(input);

    // 输出生成的哈夫曼编码
    printf("生成的哈夫曼编码: \n");
    int arr[100], top = 0;
    printHuffmanCodes(root, arr, top);

    // 对输入字符串进行哈夫曼编码
    char* encoded;
    encode(root, input, &encoded);
    printf("输入字符串的哈夫曼编码: %s\n", encoded);

    // 获取用户输入的二进制编码字符串，确保它是有效的哈夫曼编码的一部分或全部
    int validInput;
    do {
        validInput = 1;
        printf("输入一串 01 字符串（需为上述编码的一部分或全部）: ");
        fgets(binaryInput, sizeof(binaryInput), stdin);
        binaryInput[strcspn(binaryInput, "\n")] = '\0';

        // 检查输入的二进制编码是否有效，如果无效，提示用户重新输入
        if (strstr(encoded, binaryInput) == NULL) {
            printf("输入不正确，请重新输入。\n");
            validInput = 0;
        }
    } while (!validInput);

    // 对输入的二进制编码进行哈夫曼解码，并输出解码后的字符
    char output[100];
    decode(root, binaryInput, output);
    printf("解码后的字符: %s\n", output);

    // 释放动态分配的内存
    free(encoded);
    return 0;
}