哈夫曼编码
1.编码
给每一个字符标记一个单独的代码。
2.编码分类
等长编码:ASCII码等
不等长编码:Huffman编码
3.思想(不等长编码)
(1)使用频率高的字符,编码长度短;
(2)使用频率低的字符,编码长度长;
(3)利用不等长编码,可以使报文总长度较短,这也是文件压缩技术的核心。
4.不等长编码的关键: 解码时的唯一性
让任意编码都不是其他编码的前缀
5.哈夫曼编码和哈夫曼树的关系
哈夫曼树是一种用来进行哈夫曼编码的方法
(1)建立哈夫曼树
(2)在哈夫曼树的基础上编码
可以保证哈夫曼编码是最优前缀编码
哈夫曼树
Q1:什么是哈夫曼树? 叶子节点带有权值的最优二叉树.
- 叶子结点的权值
一般用来表示字符出现的次数或概率. - 最优===带权路径长度最短
设二叉树有n个带权值的叶子结点,从根结点到各个叶子结点的路径长度与相应叶子结点权值的乘积之和。 - 哈夫曼树
带权路径长度最小的二叉树,也叫做最优二叉树。
(例如:WPL= 7x2+5x2+2x3+4x3+9x2 =60)
Q2:已知一组权值给定的叶子结点{w1,w2 , … , wn},如何构造一棵哈夫曼树?
只要让权值最大的叶子结点离根最近,权值最小的叶子结点离根最远,就能使带权路径长度最小。
- 构造哈夫曼树(从下向上构造)
已知:权值表 {w1, w2, …, wn}
(1) 选择有效权值中最小的两个,构成最小二叉树,标记这两个权值已用。
(2)将这两个权值相加,之和并入权值表;返回1。 - 结束条件
如果权值表没有没用过的权值。
动态实现视频
哈夫曼树的建立过程
哈夫曼编码的实现
此处只将代码和结果贴出以供参考,暂不作解析与总结
树的打印部分有瑕疵
#include <iostream>
#include<string>
#include<iomanip>
#include<math.h>
using namespace std;
const int A = 1000;
int static countkind = 0;
struct HNode
{
int weight;
int parent;
int LChild;
int RChild;
int temp = 0;
};
struct HCode
{
char data;
string code;
};
class Huffman
{
private:
HNode*HTree;//哈夫曼树
HCode*HCodeTable;//存储编码表
int N;//叶子节点数
void code(int i, string newcode);//对第i个结点编码
public:
void Init(char a[]);
void CreateHTree(int a[], int n, char name[]);
void SelectMin(int& x,int& y, int began,int end);
void CreatCodeTable();
void Encode(char*s, string d);
void Calculate(int x, int y);
void Decode(char*s, char*d);
void Print(char*s);
int num(int weight);
~Huffman();
};
void Huffman::Init(char t[])
{
int countnum = 0;
char a[A];
strcpy_s(a, A,t);
countnum=strlen(a);
for (int j = 0; j < countnum-1; j++)//进行排序,为后面字符计数做准备
{
for (int k = 0; k < countnum- j - 1; k++)
{
if (a[k] > a[k + 1]) {
int temp = a[k];
a[k] = a[k+ 1];
a[k + 1] = temp;
}
}
}
char *cMem= new char[countnum] ;
int *iWeight=new int[countnum];
for (int i = 0; i < countnum; i++)
{
iWeight[i] = 1;
}
int countkind = 0;
for (int j = 0; j < countnum; j++)
{
if (a[j] == a[j + 1])//如果与右边值相等,则权重加一
{
cMem[countkind] = a[j];
iWeight[countkind]++;
}
else//如果与右边值不相等,则移向下一位
{
cMem[countkind] = a[j];
countkind++;
}
}
N = countkind;
CreateHTree(iWeight,N, cMem);
CreatCodeTable();
}
void Huffman::CreateHTree(int a[], int n, char name[])
{
N = n;
HCodeTable = new HCode[N];
HTree = new HNode[2 * N - 1];
for (int i = 0; i < N; i++)
{
HTree[i].weight = a[i];
HTree[i].LChild = HTree[i].RChild = HTree[i].parent = -1;
HCodeTable[i].data = name[i];
}
int x, y;
for (int i = n; i < 2 * N - 1; i++)//从已有的结点开始
{
SelectMin(x, y,0, i);//Do it yourself!
HTree[x].parent = HTree[y].parent = i;
HTree[i].weight = HTree[x].weight + HTree[y].weight;//建新的结点
HTree[i].LChild = x;//给新的结点赋值
HTree[i].RChild = y;
HTree[i].parent = -1;
}
}
void Huffman::SelectMin(int &x, int &y, int began,int end)
{
x = began;//从头开始比较
while (HTree[x].parent!=-1)//从同开始判断结点是否被比较过
x++;//直到没有被比较过为止
for (int j = 1; j < end; j++)//从第二个结点开始向后查找
{
if (HTree[j].parent != -1)//是否是被比较过的
continue; //如果是,则重新循环
x = (HTree[x].weight <= HTree[j].weight) ? x : j;//如果不是,则比较大小,然后决定是否需要更换结点
}
HTree[x].parent =100;//标记已经比较过了
y = began;//与x方法相同
while (HTree[y].parent != -1)
y++;
for (int j = 1; j < end; j++)
{
if (HTree[j].parent != -1)
continue;
y = (HTree[y].weight <= HTree[j].weight) ? y : j;
}
}
void Huffman::code(int i, string newcode)
{
if (HTree[i].LChild == -1)
{
HCodeTable[i].code = newcode;
return;//return后的语句不执行
}
code(HTree[i].LChild, newcode + "0");
code(HTree[i].RChild, newcode + "1");
}
void Huffman::CreatCodeTable()
{
code(2 * N - 2, "");//第2*N-1个结点的标号为2*N-2
}
void Huffman::Encode(char *s, string d)
{
int n = N;
cout << "Huffman code is:" << endl;
for (int i = 0; s[i] != '\0'; i++)
{
for (int j = 0; j < n; j++)
{
if (s[i] == HCodeTable[j].data)
{
d += HCodeTable[j].code;
cout << HCodeTable[j].code;
}
}
}
cout << endl;
cout << endl;
int x = strlen(s);
int y = strlen(d.c_str());
Calculate(x, y);
cout << endl;
}
void Huffman::Calculate(int x, int y) //编码前后的压缩比
{
cout << "The string length before encoding is:" << x << " Byte" << endl; //编码前以字节存储
cout << "The size of the encoded string is:" << y << " bit" << endl; //编码后以位存储
cout << "The compression ratio is:" << (((float)(y / 8)) / ((float)x)) * 100 << "%" << endl;
}
void Huffman::Decode(char*s, char*d)
{
cout << "Decode is:"<<endl;
while (*s != '\0')
{
int parent = 2 * N - 2;
while (HTree[parent].LChild != -1)
{
if (*s == '0')
parent = HTree[parent].LChild;
else
parent = HTree[parent].RChild;
s++;
}
*d = HCodeTable[parent].data;
d++;
}
cout << d<<endl;
cout << endl;
}
void Huffman::Print(char*s)
{
cout << "Huffman code table is:"<<endl;
cout << setiosflags(ios::left) << setw(4) << "n" << setw(8) << "char" << setw(8) << "code" << setw(8) << "weight" << endl;
for (int i = 0; i <N ; i++)
{
cout << setiosflags(ios::left) << setw(4) << i << setw(8) << HCodeTable[i].data << setw(8) << HCodeTable[i].code << setw(8) << HTree[i].weight << endl;
}
cout << endl;
cout << endl;
cout << "Huffman tree is:" << endl;
int queue[1000];
int f = 0, r = 0; //初始化空队列
if (HTree[2 * N - 2].weight != 0)
queue[++r] = HTree[2 * N - 2].weight; //根结点入队
int i = 2 * N - 2;
int k = (log(2 * N)) / (log(2)) + 1;
int a = 0;
int h = 0;
double n = 0;
int b = 0;
while (r != f && h<pow(2,k)-1)
{
h++;
cout << queue[++f]<<" "; //出队打印
queue[f] = NULL; //队头元素出队
a++;
if (HTree[i].LChild != -1)
queue[++r] = HTree[HTree[i].LChild].weight;//左孩子入队
else
queue[++r] = 0;
if (HTree[i].RChild != -1)
queue[++r] = HTree[HTree[i].RChild].weight;//右孩子入队
else
queue[++r]=0;
int m = f + 1;
/*if (queue[m] == 0)
{
b++;
while (queue[m] == 0)
{
m++;
}
int count=0;
while (queue[m+count] != 0)
{
count++;
}
i = num(queue[m+ b-1]);
if (b ==count)
{
b = 0;
}
}
else
{
//while (queue[m] == 0) { m++; }
i = num(queue[m]);
}*/
while (queue[m] == 0 &&h<pow(2,k)-1)
{
if (a % (int)pow(2, n) == 0)
{
n++;
a = 0;
cout << endl;
//cout << setw((int)pow(2, k - n)) << setiosflags(ios::right) << setfill(' ');
}
h++;
queue[++r] = 0;
cout << queue[++f] << " "; //出队打印
queue[f] = NULL; //队头元素出队
a++;
m = f+1;
}
i =num( queue[m]);
if (a % (int)pow(2, n)==0)
{
n++;
a = 0;
cout << endl;
//cout << setw((int)pow(2, k - n)) << setiosflags(ios::right)<< setfill('*');
}
}
}
int Huffman::num(int weight)
{
int a;
for (int i = 0; i < 2 * N - 1; i++)
{
if (HTree[i].weight == weight && HTree[i].temp == 0)
{
a = i;
HTree[i].temp = -1;
return a;
}
}
}
Huffman::~Huffman()
{
delete[]HTree;
delete[]HCodeTable;
}
void main()
{
char iOption;
bool isRight=false;
do
{
cout << "Choose your option:" << endl;
cout << "1.Use'I love data Structure, I love Computer.I will try my best to study data Structure.'as an example to test this Huffman Tree." << endl;
cout << "2.Test this Huffman Tree by yourself." << endl;
cin >> iOption;
if (iOption != '1' && iOption != '2')
cout << "Input Error! Please choose your option again!" << endl;
else
isRight = true;
} while (isRight == false);
switch (iOption)
{
case '1':
{
cout << "Your sentence is:" << endl;
cout << "I love data Structure, I love Computer.I will try my best to study data Structure." << endl;
cout << endl;
char str1[A] = { "I love data Structure, I love Computer.I will try my best to study data Structure." };
char *s1 = str1;
string strInit1 = "";
Huffman hCode;
hCode.Init(str1);
hCode.Encode(s1, strInit1);
char *p1 = (char*)strInit1.data(); //加const 或用 char *p = (char*)str.data(); 的形式
hCode.Decode(p1, s1);
hCode.Print(s1);
break;
}
case '2':
{
char g;
cin.get(g);
cout << "Please input your code:"<<endl;
char c;
char str[A] = { '\0' };
int i = 0;
while (cin.get(c))
{
if (c == '\n')
{
if (i == 0 || i == 1)
{
cout << "Error! Please input it again!" << endl;
i = 0;
continue;
}
break;
}
str[i++] = c;
}
char *s = str;
string strInit = "";
Huffman hCode;
hCode.Init(str);
int iOption2;
bool isRight2 = false;
do
{
do
{
cout << "Choose your option:" << endl;
cout << "1.Print the Huffman Tree." << endl;
cout << "2.Encode&Decode" << endl;
cout << "3.Exit program." << endl;
cin >> iOption2;
if (iOption2 != 1 && iOption2 != 2 && iOption2 != 3)
cout << "Input Error! Please choose your option again!" << endl;
else
isRight = true;
} while (isRight == false);
cout << endl;
switch (iOption2)
{
case 1:
{
hCode.Print(s);
break;
}
case 2:
{
hCode.Encode(s, strInit);
char *p = (char*)strInit.data();
hCode.Decode(p, s);
break;
}
case 3:
return;
}
} while (iOption2 != 3);
}
#ifdef h
hCode.Encode(s, strInit);
char *p = (char*)strInit.data(); //加const 或用 char *p = (char*)str.data(); 的形式
hCode.Decode(p, s);
hCode.Print(s);
#endif
}
#ifdef h
cout << "Please input your code:";
char c;
char str[A] = { '\0' };
int i = 0;
while (cin.get(c))
{
if (c == '\n')
{
if (i == 0 || i == 1)
{
cout << "Error! Please input it again!" << endl;
i = 0;
continue;
}
break;
}
str[i++] = c;
}
char *s = str;
string strInit = "";
Huffman hCode;
hCode.Init(str);
hCode.Encode(s, strInit);
int x=strlen(str);
int y=strlen=(strInlt);
char *p = (char*)strInit.data(); //加const 或用 char *p = (char*)str.data(); 的形式
hCode.Decode(p,s );
hCode.Print(s);
#endif
}
1.哈夫曼树的存储和建立
哈夫曼树的特点
只有度为2的结点和叶子结点,
所以具有n个叶子结点的哈夫曼树的结点总数 =2*n-1。
顺序存储结构
设置一个的huffTree[2*n-1]数组,