一、代码块
读入数据
Status ReadData(char *source){
//打开文件读入数据
ifstream infile;
infile.open("in.txt");
cout<<"Reading..."<<endl;
cout<<"the input file is:"<<endl;
infile.getline(source,MaxSize);
cout<<source<<endl;
infile.close();
cout<<endl;
return OK;
}
统计次数
Status WordCount(char *data,NumCount *paraCnt){
//标识是否已经记录
int flag;
int len = strlen(data);
for(int i = 0;i < len; i++){
flag = 0;
for(int j = 0;j < paraCnt->length; j++){
if(paraCnt->count[j].ch == data[i]){
//若已有记录,直接++
++paraCnt->count[j].cnt;
flag = 1;
break;
}
}
//没有记录,则新增
if(!flag){
paraCnt->count[paraCnt->length].ch = data[i];
++paraCnt->count[paraCnt->length].cnt;
++paraCnt->length;
}
}
return OK;
}
每个单词出现的对应次数
Status Show(NumCount *paraCnt){
cout<<"the length is "<<paraCnt->length<<endl;
for(int i = 0;i < paraCnt->length; i++)
{
cout<<"The character "<<paraCnt->count[i].ch<<" appears "<<paraCnt->count[i].cnt<<endl;
}
cout<<endl;
return OK;
}
建树
Status CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray){
if(length <= 1) return ERROR;
int s1,s2;
//没有度为1的节点,则总结点是2*叶子节点数-1个
int m = length*2-1;
HT = new HTNode[m+1];
for(int i = 1;i <= m; i++){
//初始化
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
for(int i = 1;i <= length; i++){
HT[i].data = cntarray.count[i-1].ch;
HT[i].weight = cntarray.count[i-1].cnt;
}
for(int i = length + 1;i <= m; i++){
//从前面的范围里选择权重最小的两个节点
select(HT,i-1,&s1,&s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
//得到一个新节点
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
return OK;
}
选择排序
Status select(HuffmanTree HT,int top,int *s1,int *s2){
int min = INT_MAX;
//选择没有双亲的节点中,权重最小的节点
for(int i = 1;i <= top; i++){
if(HT[i].weight < min && HT[i].parent == 0){
min = HT[i].weight;
*s1 = i;
}
}
min = INT_MAX;
//选择没有双亲的节点中,权重次小的节点
for(int i = 1;i <= top; i++){
if(HT[i].weight < min && i != *s1 && HT[i].parent == 0)
{
min = HT[i].weight;
*s2 = i;
}
}
return OK;
}
创建编码
Status CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length){
HC = new HCode[length+1];
//存储编码的临时空间
char *cd = new char[length];
//方便之后调用strcpy函数
cd[length-1] = '\0';
int c,f,start;
for(int i = 1;i <= length; i++){
//start表示编码在临时空间内的起始下标,由于是从叶子节点回溯,所以是从最后开始
start = length-1;
c = i;
f = HT[c].parent;
while(f != 0){
//由于是回溯,所以从临时空间的最后往回计
--start;
if(HT[f].lchild == c)
cd[start] = '0';
else
cd[start] = '1';
c = f;
f = HT[c].parent;
}
//最后,实际使用的编码空间大小是length-start
HC[i].str = new char[length-start];
HC[i].data = HT[i].data;
//从实际起始地址开始,拷贝到编码结构中
strcpy(HC[i].str,&cd[start]);
}
delete cd;
}
生成编码文件
Status Encode(char *data,HuffmanCode HC,int length){
ofstream outfile;
outfile.open("code.txt");
//依次读入数据,查找对应的编码,写入编码文件
for(int i = 0;i < strlen(data); i++){
for(int j = 1;j <= length;++j){
if(data[i] == HC[j].data){
outfile<<HC[j].str;
}
}
}
outfile.close();
cout<<"the code txt has been written"<<endl;
cout<<endl;
return OK;
}
解码
Status Decode(HuffmanTree HT,int length){
char codetxt[MaxSize*length];
ifstream infile;
infile.open("code.txt");
infile.getline(codetxt,MaxSize*length);
infile.close();
ofstream outfile;
outfile.open("out.txt");
int root = 2*length-1;
//从根节点开始遍历
for(int i = 0;i < strlen(codetxt); i++){
//为0表示向左遍历
if(codetxt[i] == '0'){
root = HT[root].lchild;
}
//为1表示向右遍历
else if(codetxt[i] == '1'){
root = HT[root].rchild;
}
if(HT[root].lchild == 0 && HT[root].rchild == 0)
//如果已经是叶子节点,输出到输出文件中,然后重新回到根节点
{
outfile<<HT[root].data;
root = 2*length-1;
}
}
outfile.close();
cout<<"the output txt has been written"<<endl;
cout<<endl;
return OK;
}
二、全部代码
//
// main.c
//
// Created by ABC on 2022/5/26.
//
#include <iostream>
#include <fstream>
#include <string.h>
using namespace std;
//读入文件的上限
#define MaxSize 1024
#define OK 1
#define ERROR 0
typedef int Status;
//统计字符和对应的次数
typedef struct wordcnt{
char ch;
int cnt = 0;
}Count;
//统计次数的外部封装
typedef struct NumCount{
Count count[MaxSize];
int length = 0;
}NumCount;
//哈夫曼树结构
typedef struct HTree{
char data;
int weight;
int parent,lchild,rchild;
}HTNode,*HuffmanTree;
//编码结构
typedef struct HCode{
char data;
char* str;
}*HuffmanCode;
//读入文件
Status ReadData(char *source);
//统计次数
Status WordCount(char *data,NumCount *paraCnt);
//展示次数
Status Show(NumCount *paraCnt);
//创建哈夫曼树
Status CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray);
//选择权重最小的两个节点
Status select(HuffmanTree HT,int top,int *s1,int *s2);
//创建哈夫曼编码
Status CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length);
//将读入的文件编码,写到txt文件
Status Encode(char *data,HuffmanCode HC,int length);
//读入编码文件,解码
Status Decode(HuffmanTree HT,int length);
int main(int argc, char** argv){
char data[MaxSize];
NumCount Cntarray;
//读入数据
ReadData(data);
//统计次数
WordCount(data,&Cntarray);
//可以查看每个单词出现的对应次数
Show(&Cntarray);
HuffmanTree tree;
//建树
CreateHuffmanTree(tree,Cntarray.length,Cntarray);
HuffmanCode code;
//创建编码
CreateHuffmanCode(tree,code,Cntarray.length);
//生成编码文件
Encode(data,code,Cntarray.length);
//解码
Decode(tree,Cntarray.length);
cout<<"Please view the generated TXT file to check the result"<<endl;
return 0;
}
Status ReadData(char *source){
//打开文件读入数据
ifstream infile;
infile.open("in.txt");
cout<<"Reading..."<<endl;
cout<<"the input file is:"<<endl;
infile.getline(source,MaxSize);
cout<<source<<endl;
infile.close();
cout<<endl;
return OK;
}
Status WordCount(char *data,NumCount *paraCnt){
//标识是否已经记录
int flag;
int len = strlen(data);
for(int i = 0;i < len; i++){
flag = 0;
for(int j = 0;j < paraCnt->length; j++){
if(paraCnt->count[j].ch == data[i]){
//若已有记录,直接++
++paraCnt->count[j].cnt;
flag = 1;
break;
}
}
//没有记录,则新增
if(!flag){
paraCnt->count[paraCnt->length].ch = data[i];
++paraCnt->count[paraCnt->length].cnt;
++paraCnt->length;
}
}
return OK;
}
Status Show(NumCount *paraCnt){
cout<<"the length is "<<paraCnt->length<<endl;
for(int i = 0;i < paraCnt->length; i++)
{
cout<<"The character "<<paraCnt->count[i].ch<<" appears "<<paraCnt->count[i].cnt<<endl;
}
cout<<endl;
return OK;
}
Status CreateHuffmanTree(HuffmanTree &HT,int length,NumCount cntarray){
if(length <= 1) return ERROR;
int s1,s2;
//没有度为1的节点,则总结点是2*叶子节点数-1个
int m = length*2-1;
HT = new HTNode[m+1];
for(int i = 1;i <= m; i++){
//初始化
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
for(int i = 1;i <= length; i++){
HT[i].data = cntarray.count[i-1].ch;
HT[i].weight = cntarray.count[i-1].cnt;
}
for(int i = length + 1;i <= m; i++){
//从前面的范围里选择权重最小的两个节点
select(HT,i-1,&s1,&s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
//得到一个新节点
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
return OK;
}
Status select(HuffmanTree HT,int top,int *s1,int *s2){
int min = INT_MAX;
//选择没有双亲的节点中,权重最小的节点
for(int i = 1;i <= top; i++){
if(HT[i].weight < min && HT[i].parent == 0){
min = HT[i].weight;
*s1 = i;
}
}
min = INT_MAX;
//选择没有双亲的节点中,权重次小的节点
for(int i = 1;i <= top; i++){
if(HT[i].weight < min && i != *s1 && HT[i].parent == 0)
{
min = HT[i].weight;
*s2 = i;
}
}
return OK;
}
Status CreateHuffmanCode(HuffmanTree HT,HuffmanCode &HC,int length){
HC = new HCode[length+1];
//存储编码的临时空间
char *cd = new char[length];
//方便之后调用strcpy函数
cd[length-1] = '\0';
int c,f,start;
for(int i = 1;i <= length; i++){
//start表示编码在临时空间内的起始下标,由于是从叶子节点回溯,所以是从最后开始
start = length-1;
c = i;
f = HT[c].parent;
while(f != 0){
//由于是回溯,所以从临时空间的最后往回计
--start;
if(HT[f].lchild == c)
cd[start] = '0';
else
cd[start] = '1';
c = f;
f = HT[c].parent;
}
//最后,实际使用的编码空间大小是length-start
HC[i].str = new char[length-start];
HC[i].data = HT[i].data;
//从实际起始地址开始,拷贝到编码结构中
strcpy(HC[i].str,&cd[start]);
}
delete cd;
}
Status Encode(char *data,HuffmanCode HC,int length){
ofstream outfile;
outfile.open("code.txt");
//依次读入数据,查找对应的编码,写入编码文件
for(int i = 0;i < strlen(data); i++){
for(int j = 1;j <= length;++j){
if(data[i] == HC[j].data){
outfile<<HC[j].str;
}
}
}
outfile.close();
cout<<"the code txt has been written"<<endl;
cout<<endl;
return OK;
}
Status Decode(HuffmanTree HT,int length){
char codetxt[MaxSize*length];
ifstream infile;
infile.open("code.txt");
infile.getline(codetxt,MaxSize*length);
infile.close();
ofstream outfile;
outfile.open("out.txt");
int root = 2*length-1;
//从根节点开始遍历
for(int i = 0;i < strlen(codetxt); i++){
//为0表示向左遍历
if(codetxt[i] == '0'){
root = HT[root].lchild;
}
//为1表示向右遍历
else if(codetxt[i] == '1'){
root = HT[root].rchild;
}
if(HT[root].lchild == 0 && HT[root].rchild == 0)
//如果已经是叶子节点,输出到输出文件中,然后重新回到根节点
{
outfile<<HT[root].data;
root = 2*length-1;
}
}
outfile.close();
cout<<"the output txt has been written"<<endl;
cout<<endl;
return OK;
}
三、小结
构建哈夫曼树的过程
对于给定的有各自权值的 n 个结点,构建哈夫曼树有一个行之有效的办法:
- 在 n 个权值中选出两个最小的权值,对应的两个结点组成一个新的二叉树,且新二叉树的根结点的权值为左右孩子权值的和;
- 在原有的 n 个权值中删除那两个最小的权值,同时将新的权值加入到 n–2 个权值的行列中,以此类推;
- 重复 1 和 2 ,直到所以的结点构建成了一棵二叉树为止,这棵树就是哈夫曼树。
- 为了获得传送报文的最短长度,可将每个字符的出现频率作为字符结点的权值赋予该结点上,显然字使用频率越小权值越小,权值越小叶子就越靠下,于是频率小编码长,频率高编码短,这样就保证了此树的最小带权路径长度效果上就是传送报文的最短长度。