# c++实现霍夫曼编码

c++实现霍夫曼编码，计算信源的熵、平均码长、编码效率、冗余度与压缩比

HuffmanCode.h

#include <stack>
#include <cmath>

typedef struct {
char ch;  // 存储的字符
int weight;  // 权重
int parent;  // 父节点
int lChild;
int rChild;
} Node;

// 选择最小的权重节点
void findMin(vector<Node> ht, int n, int* s1, int* s2) {
int min = INT_MAX;
for (int i = 1; i <= n; i++) {
if (ht[i].parent == 0) { // 如果没有父节点
min = i;
break;
}
}

for (int i = 1; i <= n; i++) {
if (ht[i].parent == 0) {
if (ht[i].weight < ht[min].weight) {
min = i;  // 权重比记录更小，则更新
}
}
}
*s1 = min;
//遍历全部结点
for (int i = 1; i <= n; i++) {
if (ht[i].parent == 0 && i != (*s1)) {
min = i;
break;
}
}
for (int i = 1; i <= n; i++) {
if (ht[i].parent == 0 && i != (*s1)) {
if (ht[i].weight < ht[min].weight) {
min = i;  // 更小则更新
}
}
}
*s2 = min;
}

// 生成哈夫曼树
void generateTree(vector<Node>& ht, int times[], int n) {
int m = 2 * n - 1;  // 节点总数
int s1;
int s2;
Node firstNode;  // 填充下标为0
firstNode.ch = '0';
firstNode.weight = 0;
firstNode.lChild = 0;
firstNode.rChild = 0;
firstNode.parent = 0;
ht.push_back(firstNode);
for (int i = 1; i <= n; i++) {
Node temp;
temp.ch = char(96 + i);// a的ASCII码为97
temp.weight = times[i];
temp.lChild = 0;
temp.rChild = 0;
temp.parent = 0;
ht.push_back(temp);
}
for (int i = n + 1; i <= m; i++) {
Node temp;
temp.ch = '0';
temp.weight = 0;
temp.lChild = 0;
temp.rChild = 0;
temp.parent = 0;
ht.push_back(temp);
}
for (int i = n + 1; i <= m; i++) {
findMin(ht, i - 1, &s1, &s2);  // 选择最小的两个权重节点
ht[s1].parent = i;
ht[s2].parent = i;
ht[i].lChild = s1;
ht[i].rChild = s2;
ht[i].weight = ht[s1].weight + ht[s2].weight;  //更新权重
}
}

// 从叶节点到根节点找霍夫曼编码
void generateCode(vector<Node>& ht, vector<string>& code, int n) {
code.push_back("");  // 0下标的占位
int p;
int c;
for (int i = 1; i <= n; i++) {
stack<char> st;  // 因为是逆向存储，所以使用栈
int start = n - 1;
for (c = i, p = ht[i].parent; p != 0; c = p, p = ht[p].parent) {
if (ht[p].lChild == c) {
st.push('0');  // 是左孩子则压入0
}
else {
st.push('1');
}
}
int s_len = st.size();
string hfm_code = "";
// 从栈中取出
for (int i = 0; i < s_len; i++) {
hfm_code += st.top();
st.pop();
}
code.push_back(hfm_code);
}
cout << "霍夫曼编码结果如下" << endl;
for (int i = 1; i <= n; i++)
{
cout << ht[i].ch << "：" << code[i] << endl;
}
}

// 计算信源的熵
double CalEntropy(int times[], int len, int n) {
double temp = 0;
for (int i = 1; i < len; i++) {
double p = double(times[i]) / n;
temp += p * log2(p);
}
temp = -temp;
return temp;
}

// 计算霍夫曼编码的平均码长
double CalAvgLen(vector<string> code, int times[], int len, int n) {
double temp = 0;
for (int i = 1; i < len; i++) {
int len = code[i].length();
double p = double(times[i]) / n;
temp += double(len) * p;
}
return temp;
}

#include <iostream>
#include <string>
#include <vector>
#include <time.h>
using namespace std;

// 随机生成字符串，这里只使用a~z，可以使用ASCII码这样就是全部字符，但是会增加遍历时间
string rand_str(int len) {
string str;
char c;
for (int i = 0; i < len; i++) {
c = 'a' + rand() % 26;
str.push_back(c);
}
return str;
}

// 生成十条随机字符串
vector<string> getChar() {
vector<string> ten_str;
srand(unsigned int(time(NULL)));  // 转换为无符号整型避免安全提示
for (int i = 0; i < 10; i++) {
int length = 1000 + rand() % 9000;
ten_str.push_back(rand_str(length));
// cout << length << endl;
}
return ten_str;
}

// 统计每个字符出现的次数
void getNum(string str) {
int nums[26] = { 0 };
for (unsigned int i = 0; i < str.length(); i++) {
nums[int(str[i]) - 97] ++;  // ASCII码中a为97
}
cout << "a~z的出现次数分别为：";
for (int i = 0; i < 26; i++) {
// cout << char(i + 97) << "的次数为：" << nums[i] << endl;
cout << nums[i] << "     ";
}
cout << endl;
}

#include <iostream>
#include "HuffmanCode.h"

int main() {
string a = rand_str(5);
vector<string> ten_str = getChar();  // 生成十条长度在1000~10000的字符串
for (int i = 0; i < 10; i++) {
cout << "第" << i + 1 << "条随机字符串长度为" << ten_str[i].length() << endl;
// getNum(ten_str[i]);

vector<Node> ht;
vector<string> code;
// 因为字符串非常长，所以默认随机生成的时候全部26个字符都包含了,简化算法复杂程度,如果字符串很短需要遍历查询
int n = 26;
int times[27] = { 0 };  // 出现的次数
for (unsigned int j = 0; j < ten_str[i].length(); j++) {
times[int(ten_str[i][j]) - 96] ++;  // ASCII码中a为97
}
cout << "a~z的出现次数分别为：";
for (int i = 1; i < 27; i++) {
cout << times[i] << " ";
}
cout << endl;
generateTree(ht, times, n);
generateCode(ht, code, n);

// 计算信源的熵
double entropy = CalEntropy(times, 27, ten_str[i].length());
cout << "信源的熵为：" << entropy << endl;

//计算霍夫曼编码的平均码长
double avg_len = CalAvgLen(code, times, 27, ten_str[i].length());
cout << "平均码长为：" << avg_len << endl;

cout << "霍夫曼编码效率为：" << entropy / avg_len << endl;
cout << "冗余度为：" << (1 - entropy / avg_len) * 100 << "%" << endl;
cout << "压缩比为：" << 5 / avg_len << endl;
cout << endl;
}
return 0;
}

• 2
点赞
• 18
收藏
觉得还不错? 一键收藏
• 0
评论
05-19 2935

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。