c++实现霍夫曼编码

c++实现霍夫曼编码,计算信源的熵、平均码长、编码效率、冗余度与压缩比

考虑到指针可能对新手不太友好,这里用的是vector容器(用法类似数组,可以动态扩容)存储树形结构,大致原理就是n号结点的左右子树分别是2n和2n+1号结点。

HuffmanCode.h

#include <stack>
#include <cmath>
#include "myhead.h"

typedef struct {
    char ch;  // 存储的字符
    int weight;  // 权重
    int parent;  // 父节点
    int lChild;
    int rChild;
} Node;

// 选择最小的权重节点
void findMin(vector<Node> ht, int n, int* s1, int* s2) {
    int min = INT_MAX;
    for (int i = 1; i <= n; i++) {
        if (ht[i].parent == 0) { // 如果没有父节点
            min = i;
            break;
        }
    }

    for (int i = 1; i <= n; i++) {
        if (ht[i].parent == 0) {
            if (ht[i].weight < ht[min].weight) {
                min = i;  // 权重比记录更小,则更新
            }
        }
    }
    *s1 = min;
    //遍历全部结点
    for (int i = 1; i <= n; i++) {
        if (ht[i].parent == 0 && i != (*s1)) {
            min = i;
            break;
        }
    }
    for (int i = 1; i <= n; i++) {
        if (ht[i].parent == 0 && i != (*s1)) {
            if (ht[i].weight < ht[min].weight) {
                min = i;  // 更小则更新
            }
        }
    }
    *s2 = min;
}

// 生成哈夫曼树
void generateTree(vector<Node>& ht, int times[], int n) {
    int m = 2 * n - 1;  // 节点总数
    int s1;
    int s2;
    Node firstNode;  // 填充下标为0
    firstNode.ch = '0';
    firstNode.weight = 0;
    firstNode.lChild = 0;
    firstNode.rChild = 0;
    firstNode.parent = 0;
    ht.push_back(firstNode);
    for (int i = 1; i <= n; i++) {
        Node temp;
        temp.ch = char(96 + i);// a的ASCII码为97
        temp.weight = times[i];
        temp.lChild = 0;
        temp.rChild = 0;
        temp.parent = 0;
        ht.push_back(temp);
    }
    for (int i = n + 1; i <= m; i++) {
        Node temp;
        temp.ch = '0';
        temp.weight = 0;
        temp.lChild = 0;
        temp.rChild = 0;
        temp.parent = 0;
        ht.push_back(temp);
    }
    for (int i = n + 1; i <= m; i++) {
        findMin(ht, i - 1, &s1, &s2);  // 选择最小的两个权重节点
        ht[s1].parent = i;
        ht[s2].parent = i;
        ht[i].lChild = s1;
        ht[i].rChild = s2;
        ht[i].weight = ht[s1].weight + ht[s2].weight;  //更新权重
    }
}

// 从叶节点到根节点找霍夫曼编码
void generateCode(vector<Node>& ht, vector<string>& code, int n) {
    code.push_back("");  // 0下标的占位
    int p;
    int c;
    for (int i = 1; i <= n; i++) {
        stack<char> st;  // 因为是逆向存储,所以使用栈
        int start = n - 1;
        for (c = i, p = ht[i].parent; p != 0; c = p, p = ht[p].parent) {
            if (ht[p].lChild == c) {
                st.push('0');  // 是左孩子则压入0
            }
            else {
                st.push('1');
            }
        }
        int s_len = st.size();
        string hfm_code = "";
        // 从栈中取出
        for (int i = 0; i < s_len; i++) {
            hfm_code += st.top();
            st.pop();
        }
        code.push_back(hfm_code);
    }
    cout << "霍夫曼编码结果如下" << endl;
    for (int i = 1; i <= n; i++)
    {
        cout << ht[i].ch << ":" << code[i] << endl;
    }
}

// 计算信源的熵
double CalEntropy(int times[], int len, int n) {
    double temp = 0;
    for (int i = 1; i < len; i++) {
        double p = double(times[i]) / n;
        temp += p * log2(p);
    }
    temp = -temp;
    return temp;
}

// 计算霍夫曼编码的平均码长
double CalAvgLen(vector<string> code, int times[], int len, int n) {
    double temp = 0;
    for (int i = 1; i < len; i++) {
        int len = code[i].length();
        double p = double(times[i]) / n;
        temp += double(len) * p;
    }
    return temp;
}

myhead.h

#include <iostream>
#include <string>
#include <vector>
#include <time.h>
using namespace std;

// 随机生成字符串,这里只使用a~z,可以使用ASCII码这样就是全部字符,但是会增加遍历时间
string rand_str(int len) {
    string str;
    char c;
    for (int i = 0; i < len; i++) {
        c = 'a' + rand() % 26;
        str.push_back(c);
    }
    return str;
}

// 生成十条随机字符串
vector<string> getChar() {
    vector<string> ten_str;
    srand(unsigned int(time(NULL)));  // 转换为无符号整型避免安全提示
    for (int i = 0; i < 10; i++) {
        int length = 1000 + rand() % 9000;
        ten_str.push_back(rand_str(length));
        // cout << length << endl;
    }
    return ten_str;
}

// 统计每个字符出现的次数
void getNum(string str) {
    int nums[26] = { 0 };
    for (unsigned int i = 0; i < str.length(); i++) {
        nums[int(str[i]) - 97] ++;  // ASCII码中a为97
    }
    cout << "a~z的出现次数分别为:";
    for (int i = 0; i < 26; i++) {
        // cout << char(i + 97) << "的次数为:" << nums[i] << endl;
        cout << nums[i] << "     ";
    }
    cout << endl;
}
#include <iostream>
#include "HuffmanCode.h"

int main() {
    string a = rand_str(5);
    vector<string> ten_str = getChar();  // 生成十条长度在1000~10000的字符串
    for (int i = 0; i < 10; i++) {
        cout << "第" << i + 1 << "条随机字符串长度为" << ten_str[i].length() << endl;
        // getNum(ten_str[i]);

        vector<Node> ht;
        vector<string> code;
        // 因为字符串非常长,所以默认随机生成的时候全部26个字符都包含了,简化算法复杂程度,如果字符串很短需要遍历查询
        int n = 26;
        int times[27] = { 0 };  // 出现的次数
        for (unsigned int j = 0; j < ten_str[i].length(); j++) {
            times[int(ten_str[i][j]) - 96] ++;  // ASCII码中a为97
        }
        cout << "a~z的出现次数分别为:";
        for (int i = 1; i < 27; i++) {
            cout << times[i] << " ";
        }
        cout << endl;
        generateTree(ht, times, n);
        generateCode(ht, code, n);

        // 计算信源的熵
        double entropy = CalEntropy(times, 27, ten_str[i].length());
        cout << "信源的熵为:" << entropy << endl;

        //计算霍夫曼编码的平均码长
        double avg_len = CalAvgLen(code, times, 27, ten_str[i].length());
        cout << "平均码长为:" << avg_len << endl;

        cout << "霍夫曼编码效率为:" << entropy / avg_len << endl;
        cout << "冗余度为:" << (1 - entropy / avg_len) * 100 << "%" << endl;
        cout << "压缩比为:" << 5 / avg_len << endl;
        cout << endl;
    }
    return 0;
}

  • 2
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个简单的霍夫曼编码的C语言实现: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_TREE_HT 100 struct HuffmanNode { char data; unsigned freq; struct HuffmanNode *left, *right; }; struct HuffmanMinHeap { unsigned size; unsigned capacity; struct HuffmanNode **array; }; struct HuffmanNode* newNode(char data, unsigned freq) { struct HuffmanNode* node = (struct HuffmanNode*)malloc(sizeof(struct HuffmanNode)); node->left = node->right = NULL; node->data = data; node->freq = freq; return node; } struct HuffmanMinHeap* createMinHeap(unsigned capacity) { struct HuffmanMinHeap* minHeap = (struct HuffmanMinHeap*)malloc(sizeof(struct HuffmanMinHeap)); minHeap->size = 0; minHeap->capacity = capacity; minHeap->array = (struct HuffmanNode**)malloc(minHeap->capacity * sizeof(struct HuffmanNode*)); return minHeap; } void swap(struct HuffmanNode** a, struct HuffmanNode** b) { struct HuffmanNode* temp = *a; *a = *b; *b = temp; } void minHeapify(struct HuffmanMinHeap* minHeap, int idx) { int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) smallest = left; if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) smallest = right; if (smallest != idx) { swap(&minHeap->array[smallest], &minHeap->array[idx]); minHeapify(minHeap, smallest); } } int isSizeOne(struct HuffmanMinHeap* minHeap) { return (minHeap->size == 1); } struct HuffmanNode* extractMin(struct HuffmanMinHeap* minHeap) { struct HuffmanNode* temp = minHeap->array[0]; minHeap->array[0] = minHeap->array[minHeap->size - 1]; --minHeap->size; minHeapify(minHeap, 0); return temp; } void insertMinHeap(struct HuffmanMinHeap* minHeap, struct HuffmanNode* node) { ++minHeap->size; int i = minHeap->size - 1; while (i && node->freq < minHeap->array[(i - 1) / 2]->freq) { minHeap->array[i] = minHeap->array[(i - 1) / 2]; i = (i - 1) / 2; } minHeap->array[i] = node; } void buildMinHeap(struct HuffmanMinHeap* minHeap) { int n = minHeap->size - 1; for (int i = (n - 1) / 2; i >= 0; --i) minHeapify(minHeap, i); } void printArr(int arr[], int n) { for (int i = 0; i < n; ++i) printf("%d", arr[i]); printf("\n"); } int isLeaf(struct HuffmanNode* root) { return !(root->left) && !(root->right); } void printCodes(struct HuffmanNode* root, int arr[], int top) { if (root->left) { arr[top] = 0; printCodes(root->left, arr, top + 1); } if (root->right) { arr[top] = 1; printCodes(root->right, arr, top + 1); } if (isLeaf(root)) { printf("%c: ", root->data); printArr(arr, top); } } void HuffmanCodes(char data[], int freq[], int size) { struct HuffmanNode *left, *right, *top; struct HuffmanMinHeap* minHeap = createMinHeap(size); for (int i = 0; i < size; ++i) insertMinHeap(minHeap, newNode(data[i], freq[i])); buildMinHeap(minHeap); while (!isSizeOne(minHeap)) { left = extractMin(minHeap); right = extractMin(minHeap); top = newNode('$', left->freq + right->freq); top->left = left; top->right = right; insertMinHeap(minHeap, top); } int arr[MAX_TREE_HT], top = 0; printCodes(extractMin(minHeap), arr, top); } int main() { char arr[] = {'a', 'b', 'c', 'd', 'e', 'f'}; int freq[] = {5, 9, 12, 13, 16, 45}; int size = sizeof(arr) / sizeof(arr[0]); HuffmanCodes(arr, freq, size); return 0; } ``` 该实现包含了Huffman编码的所有主要步骤。使用此代码,您可以编码任何字符集中的字符并生成Huffman编码。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值