文章目录
Huffman
1. C语言下的赫夫曼编码
1.1 unsigned int 与 int
- int 型的范围:-2^31 ~ 2^31 - 1,
而unsigned int的范围:0 ~ 2^32 - 平常就用int一般就行了,代码尽量好理解一点。
typedef struct
{
unsigned int weight; // int weight;这个就够用了
}
1.2 Huffman版本一:自动统计文本,进行赫夫曼编码/译码/校验
1.2.1 C语言代码
- 赫夫曼树节点的结构
// 赫夫曼树节点的结构,char data;只是叶子节点的结构
typedef struct HTNode
{
char data;
int weight;
int parent, lchild, rchild;
} *HuffmanTree;
- 每个字符的赫夫曼编码的结构
// 每个字符的赫夫曼编码的结构
typedef struct HCNode
{
char data;
char *code; // 赫夫曼编码不是一个0或1,是一串,所以用链表(指针)很合理
} *HuffmanCode;
- 统计文本字符的个数的结构体
// 统计文本字符的个数的结构体
typedef struct LeafNode
{
char data;
int weight;
} LeafNode;
- 用到的函数声明
/*
ReadCount:
返回的是:赫夫曼树的叶子节点个数
参数是:一个LeafNode型的数组
*/
int ReadCount(LeafNode character[]);
/*
Select:
返回:s1, s2(可以说成返回,因为是int &s1:引用)
s1, s2是权值最小的节点的位置
*/
void Select(HuffmanTree HT, int n, int &s1, int &s2);
/*
HuffmanCoding:
作用:编码
返回:HT, HC
构成:构建赫夫曼树 和 赫夫曼编码
*/
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, LeafNode character[], int n);
/*
WriteToFile:
作用:把HC.code写入 code.txt文件里面
*/
void WriteToFile(HuffmanCode HC, int n);
/*
HuffmanDecoding:
作用:译码
根据 code.txt里面的01串,翻译成文本,存储到 translation.txt文件里面
*/
void HuffmanDecoding(HuffmanTree HT, int n);
/*
Compare:
校验文本,比较的是text.txt和translation.txt
*/
void Compare(int n);
- 主函数
// 主函数
int main()
{
// 各个字符
LeafNode tempArray[128];
int n = ReadCount(tempArray);
// 编码
HuffmanTree HT;
HuffmanCode HC;
HuffmanCoding(HT, HC, tempArray, n);
// 译码
WriteToFile(HC, n);
HuffmanDecoding(HT, n);
// 校验
Compare(n);
printf("\n按Enter键,结束...\n");
getchar();
return 0;
}
- 源码:
#include<stdio.h>
#include<malloc.h> // 分配空间需要的头文件
#include<string.h> // strcpy函数的头文件
// 赫夫曼树节点的结构,char data;只是叶子节点的结构
typedef struct HTNode
{
char data;
int weight;
int parent, lchild, rchild;
} *HuffmanTree;
// 每个字符的赫夫曼编码的结构
typedef struct HCNode
{
char data;
char *code; // 赫夫曼编码不是一个0或1,是一串,所以用链表(指针)很合理
} *HuffmanCode;
// 统计文本字符的个数的结构体
typedef struct LeafNode
{
char data;
int weight;
} LeafNode;
//用到的函数声明
/*
ReadCount:
返回的是:赫夫曼树的叶子节点个数
参数是:一个LeafNode型的数组
*/
int ReadCount(LeafNode character[]);
/*
Select:
返回:s1, s2(可以说成返回,因为是int &s1:引用)
s1, s2是权值最小的节点的位置
*/
void Select(HuffmanTree HT, int n, int &s1, int &s2);
/*
HuffmanCoding:
作用:编码
返回:HT, HC
构成:构建赫夫曼树 和 赫夫曼编码
*/
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, LeafNode character[], int n);
/*
WriteToFile:
作用:把HC.code写入 code.txt文件里面
*/
void WriteToFile(HuffmanCode HC, int n);
/*
HuffmanDecoding:
作用:译码
根据 code.txt里面的01串,翻译成文本,存储到 translation.txt文件里面
*/
void HuffmanDecoding(HuffmanTree HT, int n);
/*
Compare:
校验文本,比较的是text.txt和translation.txt
*/
void Compare(int n);
// 主函数
int main()
{
// 各个字符
LeafNode tempArray[128];
int n = ReadCount(tempArray);
// 编码
HuffmanTree HT;
HuffmanCode HC;
HuffmanCoding(HT, HC, tempArray, n);
// 译码
WriteToFile(HC, n);
HuffmanDecoding(HT, n);
// 校验
Compare(n);
printf("\n按Enter键,结束...\n");
getchar();
return 0;
}
// read
int ReadCount(LeafNode character[])
{
FILE *fp;
char ch;
int i ,k;
LeafNode allcharacter[128];
for(i=1;i<=128;i++)
{
character[i].data = 0; character[i].weight = 0;
allcharacter[i].data = 0; allcharacter[i].weight = 0;
}
if((fp=fopen("text.txt", "r")) == NULL)
fp=fopen("text.txt","w"); // printf("\n库文件不存在!\n");
printf("\n读取文本为:\n");
ch = fgetc(fp);
while(!feof(fp))
{
printf("%c",ch);
allcharacter[ch].data = ch;
allcharacter[ch].weight++;
ch=fgetc(fp);
}
fclose(fp);
for(i=1, k=1; i<=128; i++)
{
if(allcharacter[i].weight != 0)
{
character[k].data = allcharacter[i].data;
character[k].weight = allcharacter[i].weight;
k++;
}
}
printf("\n\n统计结果或字符频率或权值为 ( < 字符 权值 > ) :\n");
int t = 0;
for(i=1; i<=k-1; i++)
{
printf(" <%2c %2d> ",character[i].data, character[i].weight);
t++;
if(t%5 == 0) printf("\n");
}
printf("\n(共 %2d 种字符)\n",k-1);
return k-1;
}
// Select
void Select(HuffmanTree HT, int n, int &s1, int &s2)
{
int min1, min2;
min1 = 10000; min2 = 10001;
int p, q;
for(int i=1; i<=n; i++)
{
if(HT[i].parent == 0)
{
if(HT[i].weight<=min1 && HT[i].weight<=min2)
{
min2 = min1;
min1 = HT[i].weight;
q = p;
p = i;
} // end if
if(HT[i].weight>min1 && HT[i].weight<min2)
{
min2 = HT[i].weight;
q = i;
} // end if
} // end if
} // end for of i
s1 = p;
s2 = q;
} // end Select
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, LeafNode character[], int n)
{
int m = 2*n-1;
int s1 = 0, s2 = 0;
int i;
if(n<=1) return;
HT=(HuffmanTree)malloc((m+1)*sizeof(HTNode));
// init
for(i=1; i<=n; i++)
{
HT[i].data = character[i].data;
HT[i].weight = character[i].weight;
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
for(i=n+1; i<=m; i++)
{
HT[i].data = '0';
HT[i].weight = 0;
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
// 建叶子以上的节点
for(i=n+1; i<=m; i++)
{
Select(HT,i-1,s1,s2);
HT[s1].parent=i; HT[s2].parent=i;
HT[i].lchild=s1; HT[i].rchild=s2;
HT[i].weight=HT[s1].weight+HT[s2].weight;
}
// 编码
HC=(HuffmanCode)malloc((n+1)*sizeof(HCNode));
char *code=(char *)malloc(n*sizeof(char));
code[n-1] = '\0';
int start;
for(i=1; i<=n; i++)
{
HC[i].data = HT[i].data;
start = n-1;
for(int c=i,f=HT[i].parent; f!=0; c=f,f=HT[f].parent)
if(HT[f].lchild == c) code[--start] = '0';
else code[--start] = '1';
HC[i].code=(char *)malloc((n-start)*sizeof(char));//1.防止浪费过多空间2.编码中没有赋值的不用输出
strcpy(HC[i].code, &code[start]);
}
int t = 0;
printf("\n每个字符对应的编码为 ( < 字符编号 字符 赫夫曼编码 > ) :\n");
for(i=1; i<=n; i++)
{
printf(" <%2d %2c %8s> ",i, HC[i].data, HC[i].code);
t++;
if(t%5 == 0) printf("\n");
}
}
// write to file
void WriteToFile(HuffmanCode HC, int n)
{
FILE *fp1, *fp2;
char ch;
int i;
if((fp1=fopen("text.txt", "r")) == NULL)
printf("\ntext文件不存在!\n");
if((fp2=fopen("code.txt","w"))==NULL)
fp2=fopen("code.txt","w");
printf("\n\n赫夫曼报文:\n");
ch = fgetc(fp1);
while(ch != EOF)
{
for(i=1; i<=n; i++)
{
if(ch == HC[i].data)
{
fputs(HC[i].code, fp2);
printf("%s", HC[i].code);
}
}
ch = fgetc(fp1);
}
fclose(fp1);
fclose(fp2);
printf("\n");
}
// decoding
void HuffmanDecoding(HuffmanTree HT, int n)
{
FILE *fp1, *fp2;
char ch;
int i;
i = 2*n-1; // 根
if((fp1=fopen("code.txt", "r")) == NULL)
printf("\ncode文件不存在!\n");
if((fp2=fopen("translation.txt","w"))==NULL)
fp2=fopen("translation.txt","w");
ch = fgetc(fp1);
printf("\n译码为:\n");
while(ch != EOF)
{
if(ch == '0') i = HT[i].lchild;
else if(ch == '1') i = HT[i].rchild;
if((HT[i].lchild==0)&&(HT[i].rchild==0))
{
printf("%c",HT[i].data);
fputc(HT[i].data, fp2);
i = 2*n-1;
}
ch = fgetc(fp1);
}
printf("\n");
if((i!=2*n-1)&&(ch==EOF)) printf("\n报文有错!!!\n");
fclose(fp1);
fclose(fp2);
}
// compare
void Compare(int n)
{
FILE *fp1, *fp2;
char arr1[1000], arr2[1000];
int i=1, t=1;
printf("\n比较结果:\n");
// 分别统计两个文件的总字符
if((fp1=fopen("text.txt", "rt"))==NULL)
printf("\n打开text.txt文件失败!\n");
for(i=1; (arr1[i]=fgetc(fp1))!=EOF; i++) { }
fclose(fp1);
if((fp2=fopen("translation.txt", "rt"))==NULL)
printf("\n打开translation.txt文件失败!\n");
for(i=1; (arr2[i]=fgetc(fp2))!=EOF; i++) { }
fclose(fp2);
while(t<i)
{
if(arr1[t]==arr2[t]) t++;
else
{
printf("\n编/译码失败!\n");
break;
}
}
if(t==i) printf("\n前后数据一致,编/译码成功!\n");
else printf("\n编/译码失败!\n");
}
- 效果:

1.2.2 使用方法
1. 复制源码,(最好放到文件夹里面)。
2. 编译运行两遍。(程序自动创建:text.txt, code.txt, translation.txt)
3. 把你想编码的英文段,写入到text.txt里面,编译运行就行了。
4. 赫夫曼编码在code.txt里面
5. 译文在translation.txt里面
1.3 Huffman版本二:自己构造权值,进行赫夫曼编码/译码
1.3.1 C语言代码
#include<stdio.h>
#include<malloc.h>
#include<string.h>
/*
赫夫曼编码,必要条件:赫夫曼树或最优二叉树
所以要构造: 赫夫曼树节点
每个节点是由叶子节点的权重向上构造的,所以都为int型
*/
typedef struct
{
char data;
int weight;
int parent, lchild, rchild;
}HTNode, *HuffmanTree; // 动态分配数组存储赫夫曼树
/*
每个字符对应一组(前缀)编码 ,
一串字符存储方法:肯定不是数组就是动态链表
动态链表储存 又叫 动态分配数组存储
*/
// 每个字符的赫夫曼编码的结构
typedef struct HCNode
{
char data;
char *code; // 赫夫曼编码不是一个0或1,是一串,所以用链表(指针)很合理
} *HuffmanCode;
// char + weight
typedef struct LeafNode
{
char data;
int weight;
} LeafNode;
/*
用到的函数
*/
void Select(HuffmanTree HT, int n, int &s1, int &s2);
int SetWeight(LeafNode w[]);
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, LeafNode w[], int n);
void HuffmanDecoding(HuffmanTree HT, int n);
/*
主函数
*/
int main()
{
LeafNode w[128];
int n = SetWeight(w);
HuffmanTree HT;
HuffmanCode HC;
// 编码
HuffmanCoding(HT, HC, w, n);
// 译码
HuffmanDecoding(HT, n);
getchar();
return 0;
}
// SetWeight
int SetWeight(LeafNode w[])
{
int i, n;
// 输入
printf("\n请输入叶子节点个数:");
scanf("%d",&n);
getchar();
for(i=1; i<=n; i++)
{
printf("\n请输入字符:");
scanf("%c",&w[i].data);
getchar();
printf("\n请输入%c的权值:",w[i].data);
scanf("%d",&w[i].weight);
getchar();
}
//打印
printf("\n< 字符 权值 >\n");
for(i=1; i<=n; i++)
{
printf("\n< %2c %2d >\n",w[i].data,w[i].weight);
}
return n;
}
/*
在 n 个叶子节点中,不断选取权值最小的两个
注意:
1.(这个涉及到构造赫夫曼树的原理),上一天我讲过
2.(&这个符号是引用),返回的是下标
*/
void Select(HuffmanTree HT, int n, int &s1, int &s2)
{
int min1=10000, min2=10001;
int p, q;
for(int i=1; i<=n; i++)
{
if(HT[i].parent == 0)
{
if(HT[i].weight<=min1 && HT[i].weight<=min2)
{
min2 = min1;
min1 = HT[i].weight;
q = p;
p = i;
} // end if
if(HT[i].weight>min1 && HT[i].weight<min2)
{
min2 = HT[i].weight;
q = i;
} // end if
} // end if
} // end for of i
s1 = p;
s2 = q;
} // end Select
// HuffmanCoding
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, LeafNode w[], int n)
{
int i, j, m;
int s1 = 0, s2 = 0;
m = 2*n-1; // 总节点数
// 判断
if(n<=1) return ;
// 初始叶子
HT=(HuffmanTree)malloc((m+1)*sizeof(HTNode)); // 0号未使用
for(i=1; i<=n; i++)
{
HT[i].data = w[i].data;
HT[i].weight = w[i].weight; // 权重是传进来的
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
// 初始叶子以上的节点
for(i=n+1; i<=m; i++)
{
HT[i].data = '0';
HT[i].weight = 0;
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
/*构造赫夫曼树*/
// 打印初态
printf("\n赫夫曼树的构造过程如下所示:\n");
printf("\nHT初态:\n");
printf("\n node weight parent lchild rchild\n");
for(i=1; i<=m; i++)
{
printf("\n %c %d %d %d %d\n",HT[i].data,HT[i].weight,HT[i].parent,HT[i].lchild,HT[i].rchild);
}
// 建叶子以上的节点
for(i=n+1; i<=m; i++)
{
Select(HT,i-1,s1,s2);
HT[s1].parent=i; HT[s2].parent=i;
HT[i].lchild=s1; HT[i].rchild=s2;
HT[i].weight=HT[s1].weight + HT[s2].weight;
printf("\nselect: s1=%d s2=%d\n",s1, s2);
printf("\n node weight parent lchild rchild\n");
for(j=1; j<=i; j++)
{
printf("\n %c %d %d %d %d\n",HT[i].data,HT[i].weight,HT[i].parent,HT[i].lchild,HT[i].rchild);
}
}
/*编码*/
HC=(HuffmanCode)malloc((n+1)*sizeof(HCNode));
char *code=(char *)malloc(n*sizeof(char)); // 赫夫曼编码长短不一,用指针比较好
code[n-1] = '\0';
int start;
for(i=1; i<=n; i++)
{
HC[i].data = HT[i].data;
start = n-1;
for(int c=i,f=HT[i].parent; f!=0; c=f,f=HT[f].parent)
if(HT[f].lchild == c) code[--start] = '0';
else code[--start] = '1';
HC[i].code=(char *)malloc((n-start)*sizeof(char));//1.防止浪费过多空间2.编码中没有赋值的不用输出
strcpy(HC[i].code, &code[start]);
}
printf("\n每个字符对应的编码为:\n");
printf("\n< 字符编号 字符 赫夫曼编码 >\n");
for(i=1; i<=n; i++)
{
printf("\n< %2d %c %8s >\n",i, HC[i].data, HC[i].code);
}
}
// 译码
void HuffmanDecoding(HuffmanTree HT, int n)
{
int i = 2*n-1;
char bit;
printf("\n请输入二进制串,以-1结束,空格间断,译码结果为::\n");
scanf("%c",&bit);
while(bit != '#')
{
if(bit == '0') i = HT[i].lchild;
else if(bit == '1') i = HT[i].rchild;
if((HT[i].lchild==0)&&(HT[i].rchild==0))
{
printf("%c",HT[i].data);
i = 2*n-1;
}
scanf("%c",&bit);
}
printf("\n");
if((i!=2*n-1)&&(HT[i].lchild!=0)) printf("\n报文有错!!!\n");
}
1.3.2 输出样例



1184

被折叠的 条评论
为什么被折叠?



