实验环境:Windows11 + Visual Studio
实验任务:
以下的代码完成了前五个要求。
代码可以直接粘贴到IDE中运行
#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <math.h>
#define ASCIInum 127 //ascii码从0到127总共128个字符
#define Maxsize 200
#define MaxCodelen 16
#define MaxArticlelen 5000
typedef struct CharacterNode
{
char ch;
int freq;
}CharacterNode; //用来统计读入文件中字符的频数
typedef struct HTNode
{
char ch;
int weight; //哈夫曼节点的权值
int lchild;
int rchild;
int parent;
}HTNode; //哈夫曼树节点
typedef struct CodeNode
{
char ch; //被编码的字符
char bits[MaxCodelen + 1]; //字符的编码
}CodeNode; //哈夫曼编码节点
int Readfile(char* file,CharacterNode *asc,int *sum);
void PrintFreq(CharacterNode* asc,int n);
void CreateHuffTree(CharacterNode*asc,int n,HTNode* T);
void SelectMin(HTNode* T, int n, int* p1, int* p2);
void DisplayHuffTree(HTNode* T,int n);
void CharsetEncoding(HTNode* T,CodeNode* Huffcode,int n);
void PrintEncoding(CodeNode* Huffcode, int n);
int FileEncoding(CodeNode* Huffcode, int n, char* infile,char* outfile);
char* FindChar(CodeNode* Huffcode, int n, char ch);
float ComputeRate_1(int chnum,char*outfile);
float ComputeRate_2(int chnum, char* outfile, int n);
int Decoding(char* Codingfile, char*textfile,HTNode* T,int valid,int chnum);
void PrintText(char* file);
void FileEncodingBinary(CodeNode* Huffcode, int n, char* infile, char* outfilebinary);
int DecodingBinary(char* CodingfileBinary, char* textfile, HTNode* T, int valid, int chnum);
int main(void)
{
CharacterNode asc[ASCIInum];
char* filename = "data.txt";
int chnum = 0; //文件中的字符数
int valid = 0; //有效的字符种类
valid = Readfile(filename, asc, &chnum);
PrintFreq(asc, valid);
HTNode huff[Maxsize];
CreateHuffTree(asc, valid, huff);
DisplayHuffTree(huff, valid);
CodeNode Huffcode[ASCIInum];
CharsetEncoding(huff, Huffcode, valid);
PrintEncoding(Huffcode, valid);
char* outfile = "HuffEncoding.txt";
char* outfilebinary = "HuffEncodingBinary.dat";
FileEncoding(Huffcode, valid, filename, outfile);
FileEncodingBinary(Huffcode, valid, filename, outfilebinary);
ComputeRate_1(chnum, outfile);
ComputeRate_2(chnum, outfile, valid);
char* textfile = "Decode.txt";
char* textfileFromB = "DecodeBinary.txt";
PrintText(filename);
Decoding(outfile, textfile, huff, valid, chnum);
DecodingBinary(outfilebinary, textfileFromB, huff, valid, chnum);
return 0;
}
//读取文件,统计字符的频度,按从大到小的顺序排列
int Readfile(char* file,CharacterNode* asc,int*sum)
{
FILE* fp;
fp = fopen(file, "r");
if (fp == NULL)
{
printf("open failure\n");
return -1;
}
for (int i = 0;i < ASCIInum;++i) //初始化字符数组
{
asc[i].ch = i;
asc[i].freq = 0;
}
char tempch; //用来接受文件中的字符
while ((tempch = fgetc(fp))!=EOF)
{
asc[(int)tempch].freq++;
(* sum)++;
}
fclose(fp);
//按找字符出现的频率,选择法降序排列
int i, j, k;
struct CharacterNode temp;
for (i = 0;i < ASCIInum;++i)
{
k = i;
for (j = i + 1;j < ASCIInum;++j)
{
if (asc[j].freq > asc[k].freq)
{
k = j;
}
}
if (k != i)
{
temp = asc[i];
asc[i] = asc[k];
asc[k] = temp;
}
}
int count = 0;
for (int i = 0;i < ASCIInum;++i) //统计weight不为0的字符的个数
{
if (asc[i].freq)
{
count++;
}
}
return count;
}
//打印字符的出现频度
void PrintFreq(CharacterNode* asc, int n)
{
printf("\n-----------------\n");
printf("文本的字符种类有%d个\n", n);
printf("字符\t频度\t\n");
for (int i = 0;i < n;++i)
{
if (asc[i].ch == ' ')
{
printf("空格\t%d\n", asc[i].freq);
}
else if (asc[i].ch == '\n')
{
printf("\\n\t%d\n", asc[i].freq);
}
else
{
printf("%c\t%d\n", asc[i].ch, asc[i].freq);
}
}
printf("-----------------\n");
}
//利用统计的字符频度,创建Huffman树
void CreateHuffTree(CharacterNode* asc, int n, HTNode* T)
{
for (int i = 0;i < 2*n-1;++i) //初始化huffman节点
{
T[i].ch = asc[i].ch;
T[i].weight = asc[i].freq;
T[i].lchild = -1;
T[i].rchild = -1;
T[i].parent = -1;
}
int p1, p2; //用来存储被选中的两个节点的下标
for (int i = n; i < 2*n-1;++i) //进行n-1次合并,构造哈夫曼树
{
SelectMin(T,i,&p1, &p2);
T[p1].parent = i;
T[p2].parent = i;
T[i].lchild = p1;
T[i].rchild = p2;
T[i].weight = T[p1].weight + T[p2].weight;
}
}
//在构造Huffman树的过程中,选择最小的两个节点合成一个新的节点
//在n前面的n-1个节点中找
void SelectMin(HTNode* T, int n, int* p1, int* p2)
{
int i, j;
for (i = 0;i < n;++i) //找第一个没有双亲的节点
{
if (T[i].parent == -1) {
*p1 = i;
break;
}
}
for (j = i + 1;j < n;++j) //找第二个没有双亲的节点
{
if (T[j].parent == -1){
*p2 = j;
break;
}
}
for (int k = 0;k < n; ++k) //找最小节点
{
if (T[k].weight < T[*p1].weight && T[k].parent == -1 && k != *p2)
{
*p1 = k;
}
}
for (int k = 0;k < n; ++k) //找次小的节点
{
if (T[k].weight < T[*p2].weight && T[k].parent == -1 && k != *p1)
{
*p2 = k;
}
}
return;
}
//打印哈夫曼树的表
//采用二叉树的静态存储方式(数组)
void DisplayHuffTree(HTNode* T,int n)
{
printf("构造的哈夫曼树的静态存储表如下\n");
printf("序号\t字符\t权值\t左孩子\t右孩子\t双亲\n");
for (int i = 0;i < n;++i)
{
if (T[i].ch == '\n')
{
printf("%d\t\\n\t%d\t%d\t%d\t%d\t\n", i, T[i].weight, T[i].lchild, T[i].rchild, T[i].parent);
}
else {
printf("%d\t%c\t%d\t%d\t%d\t%d\t\n", i, T[i].ch, T[i].weight, T[i].lchild, T[i].rchild, T[i].parent);
}
}
printf("序号\t权值\t左孩子\t右孩子\t双亲\n");
for (int i = n;i < 2 * n - 1;++i)
{
printf("%d\t%d\t%d\t%d\t%d\t\n", i, T[i].weight, T[i].lchild, T[i].rchild, T[i].parent);
}
}
//生成哈夫曼编码
//对每个叶子节点,左支赋0,右支赋1,从跟到叶子的路径上的01序列就是哈夫曼编码
void CharsetEncoding(HTNode* T, CodeNode* Huffcode,int n)
{
int child, parent;
char cd[MaxCodelen + 1]; //临时存放编码
int start; //指示编码在cd中的位置
cd[MaxCodelen] = '\0';
//注意查找是从叶子网上找,使用编码也是从后往前走
for (int i = 0;i < n;++i)
{
Huffcode[i].ch = T[i].ch;
start = MaxCodelen;
child = i;
while ((parent = T[child].parent ) >= 0) //一直上溯至根节点,根节点的双亲为-1
{
if (T[parent].lchild == child){ //左支为0
cd[--start] = '0';
}
else { //右支为1
cd[--start] = '1';
}
child = parent; //继续上溯
}
strcpy(Huffcode[i].bits, &cd[start]);
}
}
//打印字符的哈夫曼编码
void PrintEncoding(CodeNode* Huffcode, int n)
{
printf("字符\t编码\n");
for (int i = 0;i < n;++i)
{
if (Huffcode[i].ch == '\n')
{
printf("\\n\t%s\n", Huffcode[i].bits);
}
else
{
printf("%c\t%s\n", Huffcode[i].ch, Huffcode[i].bits);
}
}
}
//利用得到的哈夫曼编码将文本文件进行压缩
//把01编码写到文本文件中
int FileEncoding(CodeNode* Huffcode, int n, char* infile,char* outfile)
{
FILE* in = fopen(infile, "r");
FILE* out = fopen(outfile,"a");
if (in == NULL||out==NULL)
{
printf("open failure\n");
return -1;
}
char temp;
char* tempbits;
while ((temp = fgetc(in)) != EOF)
{
tempbits = FindChar(Huffcode, n, temp);
fprintf(out,"%s", tempbits);
}
fclose(in);
fclose(out);
return 0;
}
//在编码数组中,根据字符返回编码串
char* FindChar(CodeNode* Huffcode, int n, char ch)
{
for (int i = 0;i < n;++i)
{
if (ch == Huffcode[i].ch)
{
return Huffcode[i].bits;
}
}
printf("没找到字符\n");
return ' ';
}
//计算文件的压缩率,压缩率1=哈夫曼编码的字节/ascii码的码长(8位)
float ComputeRate_1(int chnum, char* outfile)
{
float rate;
FILE* fp;
fp = fopen(outfile, "r");
if (fp == NULL)
{
printf("open failure\n");
return -1;
}
fseek(fp, 0, SEEK_END);
int filelen = ftell(fp);
printf("\n哈夫曼编码的文件长度为%d\n", filelen);
rate = (float)filelen / (chnum * 8);
printf("ascii压缩率 = %f\n", rate);
fclose(fp);
return rate;
}
//和等长码相比的压缩率,压缩率2 = 哈夫曼编码的字节/平均码长
float ComputeRate_2(int chnum, char* outfile,int n)
{
float rate;
FILE* fp;
fp = fopen(outfile, "r");
if (fp == NULL)
{
printf("open failure\n");
return -1;
}
fseek(fp, 0, SEEK_END);
int filelen = ftell(fp);
int averagelen = (int)ceil(log2(n)); //2的对数向上取整
printf("\n%d个字符的等长编码的单个码长为%d\n", n,averagelen);
printf("哈夫曼编码的文件长度为%d\n", filelen);
rate = (float)filelen / (chnum * averagelen);
printf("等长码压缩率 = %f\n", rate);
fclose(fp);
return rate;
}
//解码
//将01编码输出到文本文件中
int Decoding(char* Codingfile, char* Textfile, HTNode* T,int valid,int chnum)
{
FILE* in = fopen(Codingfile, "r");
FILE* out = fopen(Textfile, "a");
if (in == NULL || out == NULL)
{
printf("Open failure\n");
return -1;
}
char buffer[MaxArticlelen]; //接受哈夫曼编码字符串
fgets(buffer,MaxArticlelen, in);
char textstring[MaxArticlelen];
int textlen = 0;
int j = 0; //用来指向Huffman编码串
int child;
printf("\n解码文件的内容如下\n");
for (int i = 0;i < chnum;++i) //需要翻译出chnum个字符
{
child = 2 * valid - 2; //从哈夫曼树的根节点开始遍历
while (1)
{
if (buffer[j] == '0')
{
child = T[child].lchild;
++j;
}
else if (buffer[j] == '1')
{
child = T[child].rchild;
++j;
}
if (T[child].lchild == -1 && T[child].rchild == -1) //找到叶子节点了
{
textstring[textlen] = T[child].ch;
printf("%c", textstring[textlen]);
fprintf(out, "%c", textstring[textlen]);
textlen++;
break;
}
}
}
printf("\n");
fclose(in);
fclose(out);
return 0;
}
//打印文本文件内容
void PrintText(char* file)
{
printf("\n原文如下\n");
FILE* fp = fopen(file, "r");
if (fp == NULL)
{
printf("Open failure\n");
return;
}
char temp;
while ((temp = fgetc(fp))!= EOF)
{
printf("%c", temp);
}
printf("\n");
}
//把01编码写入二进制文件
//最后不满8位的s通过补0来补全
void FileEncodingBinary(CodeNode* Huffcode, int n, char* infile, char* outfilebinary)
{
FILE* in = fopen(infile, "r");
FILE* out = fopen(outfilebinary, "ab");
if (in == NULL || out == NULL)
{
printf("open failure\n");
return -1;
}
char temp; //用来接受文本文件中读取的字符
char* tempbits; //存储字符的编码
char buffer[MaxArticlelen] = { 0 };
int len; //统计当前缓冲区字符串的长度
int c = 0; //用来转换为位表示的空间
while ((temp = fgetc(in)) != EOF)
{
tempbits = FindChar(Huffcode, n, temp);
strcat(buffer, tempbits); //把字符的编码接到缓冲字符数组中
len = strlen(buffer);
if (len >= 8)
{
for (int i = 0;i < 8;++i) //满8个01字符转化为8位(一个字符)写入
{
if (buffer[i] == '1') //二进制编码左移,最低位置1
{
c = c << 1;
c |= 1; //1的二进制表示只有最低位为1,做或运算就把最低位置为1
}
else if (buffer[i] == '0') //二进制编码左移,即最低位置0
{
c = c << 1;
}
}
fwrite(&c, sizeof(char), 1, out); //把八位当成字符写入二进制文件
strcpy(buffer, buffer + 8); //更新写入后缓冲区字符串的长度
}
}
//处理01编码的末尾
len = strlen(buffer); //判断最后是否有不满8位的剩余
if (len > 0)
{
c = 0;
for (int k = 0;k < 8 - len;++k)
{
strcat(buffer, "0");
}
for (int i = 0;i < 8;++i)
{
if (buffer[i] == '1') //二进制编码左移,最低位置1
{
c = c << 1;
c |= 1;
}
else if (buffer[i] == '0') //二进制编码左移,即最低位置0
{
c = c << 1;
}
}
fwrite(&c, sizeof(char), 1, out);
}
fclose(in);
fclose(out);
return 0;
}
//读取二进制文件,翻译出文本文件
int DecodingBinary(char* CodingfileBinary, char* textfile, HTNode* T, int valid, int chnum)
{
FILE* in = fopen(CodingfileBinary, "rb");
FILE* out = fopen(textfile, "a");
if (in == NULL || out == NULL)
{
printf("Open failure\n");
return -1;
}
int j = 0; //用来指向Huffman编码串
int child = 2 * valid - 2;
char temp; //用来读取二进制文件
printf("\n从二进制文件中解码出的文件内容如下\n");
int count = 0; //统计已读出的字符数
while(count<chnum)
{
fread(&temp, sizeof(char), 1, in);
for (int i = 0;i < 8;++i)
{
if (temp & 128)
{
child = T[child].rchild;
}
else
{
child = T[child].lchild;
}
if (T[child].lchild == -1 && T[child].rchild == -1)
{
printf("%c", T[child].ch);
fwrite(&(T[child].ch), sizeof(char), 1, out);
count++; //读出的字符数加1
if (count == chnum) break; //如果读满了那么多字符就退出
child = 2 * valid - 2;
}
temp = temp << 1;
}
}
printf("\n");
fclose(in);
fclose(out);
return 0;
}