哈夫曼编码压缩原理:由于每个字符在内存中都是以ASCII码进行存储,所以每个字符都占用了八个01位,利用哈夫曼树对每个字符进行01编码,根据字符在文章中出现的频率调整01串长度,出现频率高的字符在哈夫曼树中的权重大,编码后01串短,从而使得最终计算出的平均编码长度小于8,在本代码中平均编码长度约为4.72,压缩率约为59%,从而达到压缩文本的目的。
#include "stdafx.h"
#include<string>
int length;
struct Char_Frequency
{
char c;
int Frequency;
};
Char_Frequency a[10000];
void Char_Probability_Fromfile(FILE *fp)
{
length = 0;
for (int i = 0; i < 100; i++)
{
a[i].c = NULL;
a[i].Frequency = 0;
}
char ch;
while (fscanf(fp, "%c", &ch) == 1)
{
length++;
int i = 0, flag = 0;
while (a[i].c != NULL)
{
if (a[i].c == ch)
{
a[i].Frequency++;
flag = 1;
}
i++;
}
if (!flag)
{
a[i].c = ch;
a[i].Frequency = 1;
}
}
}
typedef struct {
int weight;
char c;
int lchild;