要求如下
在这里我们看到,要求要进行编码和译码,并且要能计算压缩率,还要有文件读取的操作,因此我们先进行文件读取操作的学习
前置知识
1.fopen-打开一个文件,在这里踩了个坑是,在把文件路径复制到代码中时,需要在\后面再加上一个\,因为需要进行转义要把特殊字符\转换成普通字符,例子如下
#include <stdio.h>
int main() {
printf("\\"); // 输出反斜杠字符 "\"
return 0;
}
输出结果是\
2.fscanf 从文件中读入
3.fprintf 向文件输入
接下来我们就要开始编写程序了
首先我们要先定义一些基本的东西
struct character
{
char ch;
int number = 0;
}; //文件中出现的字符和其出现次数
struct huffmannode
{
int weight;
int parent;
int left;
int right;
char value;
int i;
}; //哈夫曼节点
struct huffmancode
{
int bit[MAXBIT];
int front;
}; //记录每个字符哈夫曼编码
最开始我们要计算不同字符在文本中出现的频率,在这里统一说明,我们的功能应该在函数中分开实现,代码如下
void tongji(character chars[], int& n) //统计原文本中出现的字符及其使用频率
{
FILE* file = NULL;
if ((file = fopen("C:\\huff\\word.txt", "r")) == NULL)
{
printf("打开word.txt失败!\n");
exit(0);
}//如果没有打开,就输出失败,然后直接结束程序-后路
char ch;
int i = 0, j = 0, judge = 0;;
fscanf(file, "%c", &ch);
chars[i].ch = ch;
chars[i].number++;
i++;
while (ch != '#') //从文件读入文本,以#标记结束
{
fscanf(file, "%c", &ch);
judge = 0;
if (ch != '#')
{
for (j = 0; j < i; j++)
{
if (ch == chars[j].ch) //如果读入的字符已被记录,则将出现次数+1
{
chars[j].number++;
judge = 1;
break;
}
}
if (judge == 0)
{
chars[i].ch = ch;
chars[i].number++;
i++;
}//如果没有出现在已知,那么就创建新的
}
}
n = i;//记录文件中出现的字母数目
fclose(file);
}
在这里借鉴了一下别人的读取思路,总之就是将读入的字符记录,如果新读入的字符不在原本的字符样本里面,就把这个新加进去,思路还是十分明确的,然后我们要实现计算字符频率的事情,代码如下
void printout(character ch[], int n)
{
int total = 0;
for (int i = 0; i < n; i++)
{
total = total + ch[i].number;
}
for (int i = 0; i < n; i++)
{
printf("字符%c的频率是%f\n", ch[i].ch,(1.0*ch[i].number) / total);
}
}
就把每个字符出现的频率加和然后除去自身就好了
接下来我们要实现计算哈夫曼树的构造和编码,相信你已经学过什么是哈夫曼树了,在这里我的程序没有进一步用堆优化,而是直接算出来最小的两个节点,代码如下
void createhuffmantree(huffmannode huffnode[], int n, character chars[]) //构建哈夫曼树
{
int i, j, x1, x2;
int maxweight = 50000;
for (i = 0; i < 2 * n - 1; i++)
{
huffnode[i].weight = chars[i].number;
huffnode[i].parent = TOP;
huffnode[i].left = null;
huffnode[i].right = null;
huffnode[i].value = chars[i].ch;
}//初始化,2*n-1的原因是合并n-1次会出现n-1个节点
for (i = 0; i < n - 1; i++)//有n个节点就要进行n-1次合并
{
maxweight = MAXWEIGHT;
x1 = x2 = 0;
for (j = 0; j < n + i; j++)
{
if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight)
{
maxweight = huffnode[j].weight;
x1 = j;
}
}//找到第一小节点
maxweight = MAXWEIGHT;
for (j = 0; j < n + i; j++)
{
if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight && j != x1)
{
maxweight = huffnode[j].weight;
x2 = j;
}
}//找到第二小节点,多了一个判定条件
huffnode[x1].parent = n + i;
huffnode[x2].parent = n + i;//将新节点和最小两个节点连接
huffnode[n + i].weight = huffnode[x1].weight + huffnode[x2].weight;
huffnode[n + i].left = x1;
huffnode[n + i].right = x2;
}
}
十分常规的写法,接下来就是计算每个字符的哈夫曼编码,一般都是从叶节点从下向上算,我们用一个循环来做,我们知道,树的根节点是不计入计算的,所以,循环终止的条件就是抵达根节点,代码如下
void Coding(huffmannode huffnode[], huffmancode huffcode[], int n) //构建字符的哈夫曼编码表
{
int father;
int now, i;
huffmancode temp;
int p;
for (i = 0; i < n; i++)
{
huffcode[i].front = MAXBIT - 1;
now = i;
father = huffnode[i].parent;
while (father != TOP)//一直到根节点,根节点不存储编码
{
if (now == huffnode[father].left) {
huffcode[i].bit[huffcode[i].front] = 0;//左子树存储0
}
else {
huffcode[i].bit[huffcode[i].front] = 1;//右子树存储1
}
now = father;
father = huffnode[father].parent;
huffcode[i].front--;
}
++huffcode[i].front;//多减了一位,一定要有自加一次
}
}
接下来就是把文件进行编码了,有了上面的函数,这一步是十分轻松的,代码如下
void CodingFile(huffmannode huffnode[], huffmancode huffcode[], int n, int& chnum) //对原文本编码并将编码写入文件
{
FILE* file1 = NULL;
FILE* file2 = NULL;
if ((file1 = fopen("C:\\huff\\word.txt", "r")) == NULL)
{
printf("打开word.txt失败!\n");
exit(0);
}
if ((file2 = fopen("C:\\huff\\asc.txt", "w")) == NULL)
{
printf("打开asc.txt失败!\n");
exit(0);
}
char ch;
fscanf(file1, "%c", &ch);
if (ch != '#')
{
for (int i = 0; i < n; i++)
{
if (huffnode[i].value == ch)
{
for (int j = huffcode[i].front; j < MAXBIT; j++)
{
fprintf(file2, "%d", huffcode[i].bit[j]);
chnum++;
}
}
}
}
while (ch != '#') {
fscanf(file1, "%c", &ch);
if (ch != '#')
{
for (int i = 0; i < n; i++)
{
if (huffnode[i].value == ch)
{
for (int j = huffcode[i].front; j < MAXBIT; j++)
{
fprintf(file2, "%d", huffcode[i].bit[j]);
chnum++;
}
}
}
}
};
fclose(file1);
fclose(file2);
}
解码代码如下
void Decoding(huffmannode huffnode[], int n)
{
FILE* fc = NULL, * fd = NULL;
if ((fc = fopen("C:\\huff\\asc.txt", "r")) == NULL)
{
printf("打开asc.txt失败!\n");
exit(0);
}
if ((fd = fopen("C:\\huff\\outcome.txt", "w")) == NULL)
{
printf("打开outcome.txt失败!\n");
exit(0);
}
char nums[100000];
fscanf(fc, "%s", nums);//因为译码文件没有空格
int i = 0, j = 0, temp;
for (i = 0; nums[i] != '\0'; i++);
while (j < i)
{
temp = 2 * n - 2;//找到根节点
while (huffnode[temp].left != null && huffnode[temp].right != null)
{
if (nums[j] == '0')
temp = huffnode[temp].left;
else
temp = huffnode[temp].right;
j++;
}
fprintf(fd, "%c", huffnode[temp].value);
}
fclose(fc);
fclose(fd);
}
计算压缩率
void Calculate_yasuo(character ch[], int n, int chnum) //计算压缩率
{
int total = 0;
for (int i = 0; i < n; i++)
{
total =total+ ch[i].number;
}
printf("=>压缩成功,压缩比%f%%", 1.0*chnum /(8*total * 100));//除8是因为在txt文档中,一个字符占8位
}
在这里加上两个%%是因为要进行转义
源代码如下
#include<iostream>
#include<stdio.h>
#include<stdlib.h>
using namespace std;
#define MAXWEIGHT 10000
#define MAXBIT 100
#define TOP -1
#define null -1
struct character
{
char ch;
int number = 0;
}; //文件中出现的字符和其出现次数
struct huffmannode
{
int weight;
int parent;
int left;
int right;
char value;
int i;
}; //哈夫曼节点
struct huffmancode
{
int bit[MAXBIT];
int front;
}; //记录每个字符哈夫曼编码
void tongji(character chars[], int& n) //统计原文本中出现的字符及其使用频率
{
FILE* file = NULL;
if ((file = fopen("C:\\huff\\word.txt", "r")) == NULL)
{
printf("打开word.txt失败!\n");
exit(0);
}//如果没有打开,就输出失败,然后直接结束程序-后路
char ch;
int i = 0, j = 0, judge = 0;;
fscanf(file, "%c", &ch);
chars[i].ch = ch;
chars[i].number++;
i++;
while (ch != '#') //从文件读入文本,以#标记结束
{
fscanf(file, "%c", &ch);
judge = 0;
if (ch != '#')
{
for (j = 0; j < i; j++)
{
if (ch == chars[j].ch) //如果读入的字符已被记录,则将出现次数+1
{
chars[j].number++;
judge = 1;
break;
}
}
if (judge == 0)
{
chars[i].ch = ch;
chars[i].number++;
i++;
}//如果没有出现在已知,那么就创建新的
}
}
n = i;//记录文件中出现的字母数目
fclose(file);
}
void createhuffmantree(huffmannode huffnode[], int n, character chars[]) //构建哈夫曼树
{
int i, j, x1, x2;
int maxweight = 50000;
for (i = 0; i < 2 * n - 1; i++)
{
huffnode[i].weight = chars[i].number;
huffnode[i].parent = TOP;
huffnode[i].left = null;
huffnode[i].right = null;
huffnode[i].value = chars[i].ch;
}//初始化,2*n-1的原因是合并n-1次会出现n-1个节点
for (i = 0; i < n - 1; i++)//有n个节点就要进行n-1次合并
{
maxweight = MAXWEIGHT;
x1 = x2 = 0;
for (j = 0; j < n + i; j++)
{
if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight)
{
maxweight = huffnode[j].weight;
x1 = j;
}
}//找到第一小节点
maxweight = MAXWEIGHT;
for (j = 0; j < n + i; j++)
{
if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight && j != x1)
{
maxweight = huffnode[j].weight;
x2 = j;
}
}//找到第二小节点,多了一个判定条件
huffnode[x1].parent = n + i;
huffnode[x2].parent = n + i;//将新节点和最小两个节点连接
huffnode[n + i].weight = huffnode[x1].weight + huffnode[x2].weight;
huffnode[n + i].left = x1;
huffnode[n + i].right = x2;
}
}
void Coding(huffmannode huffnode[], huffmancode huffcode[], int n) //构建字符的哈夫曼编码表
{
int father;
int now, i;
huffmancode temp;
int p;
for (i = 0; i < n; i++)
{
huffcode[i].front = MAXBIT - 1;
now = i;
father = huffnode[i].parent;
while (father != TOP)//一直到根节点,根节点不存储编码
{
if (now == huffnode[father].left) {
huffcode[i].bit[huffcode[i].front] = 0;//左子树存储0
}
else {
huffcode[i].bit[huffcode[i].front] = 1;//右子树存储1
}
now = father;
father = huffnode[father].parent;
huffcode[i].front--;
}
++huffcode[i].front;//多减了一位,一定要有自加一次
}
}
void CodingFile(huffmannode huffnode[], huffmancode huffcode[], int n, int& chnum) //对原文本编码并将编码写入文件
{
FILE* file1 = NULL;
FILE* file2 = NULL;
if ((file1 = fopen("C:\\huff\\word.txt", "r")) == NULL)
{
printf("打开word.txt失败!\n");
exit(0);
}
if ((file2 = fopen("C:\\huff\\asc.txt", "w")) == NULL)
{
printf("打开asc.txt失败!\n");
exit(0);
}
char ch;
fscanf(file1, "%c", &ch);
if (ch != '#')
{
for (int i = 0; i < n; i++)
{
if (huffnode[i].value == ch)
{
for (int j = huffcode[i].front; j < MAXBIT; j++)
{
fprintf(file2, "%d", huffcode[i].bit[j]);
chnum++;
}
}
}
}
while (ch != '#') {
fscanf(file1, "%c", &ch);
if (ch != '#')
{
for (int i = 0; i < n; i++)
{
if (huffnode[i].value == ch)
{
for (int j = huffcode[i].front; j < MAXBIT; j++)
{
fprintf(file2, "%d", huffcode[i].bit[j]);
chnum++;
}
}
}
}
};
fclose(file1);
fclose(file2);
}
void Decoding(huffmannode huffnode[], int n)
{
FILE* fc = NULL, * fd = NULL;
if ((fc = fopen("C:\\huff\\asc.txt", "r")) == NULL)
{
printf("打开asc.txt失败!\n");
exit(0);
}
if ((fd = fopen("C:\\huff\\outcome.txt", "w")) == NULL)
{
printf("打开outcome.txt失败!\n");
exit(0);
}
char nums[100000];
fscanf(fc, "%s", nums);//因为译码文件没有空格
int i = 0, j = 0, temp;
for (i = 0; nums[i] != '\0'; i++);
while (j < i)
{
temp = 2 * n - 2;//找到根节点
while (huffnode[temp].left != null && huffnode[temp].right != null)
{
if (nums[j] == '0')
temp = huffnode[temp].left;
else
temp = huffnode[temp].right;
j++;
}
fprintf(fd, "%c", huffnode[temp].value);
}
fclose(fc);
fclose(fd);
}
void printout(character ch[], int n)
{
int total = 0;
for (int i = 0; i < n; i++)
{
total = total + ch[i].number;
}
for (int i = 0; i < n; i++)
{
printf("字符%c的频率是%f\n", ch[i].ch,(1.0*ch[i].number) / total);
}
}
void Calculate_yasuo(character ch[], int n, int chnum) //计算压缩率
{
int total = 0;
for (int i = 0; i < n; i++)
{
total =total+ ch[i].number;
}
printf("=>压缩成功,压缩比%f%%", 1.0*chnum /(8*total * 100));//除8是因为在txt文档中,一个字符占8位
}
int main()
{
huffmannode huffnode[500];
character chars[500];
huffmancode huffcode[500];
int n = 0, chnum = 0;
cout << "********请选择:1 压缩 2 解压 ********|" << endl;
cout << "!!!!!注意!!!!! |" << endl;
cout << "****系统只支持编码或者解压分开进行****|" << endl;
cout << "************输入0退出系统*************|" << endl;
cout << "---------------------------------------" << endl;
int x; cin >> x;
while (x != 0) {
if (x == 1) {
cout << "系统将自动压缩你保存在本地的word.txt文件" << endl;
tongji(chars, n);
printout(chars, n);
createhuffmantree(huffnode, n, chars);
Coding(huffnode, huffcode, n);
CodingFile(huffnode, huffcode, n, chnum);
Calculate_yasuo(chars, n, chnum);
cout << "------------------------------------------" << endl;
cout << "********请选择:1 压缩 2 解压 ********|" << endl;
cout << "!!!!!注意!!!!! |" << endl;
cout << "****系统只支持编码或者解压分开进行****|" << endl;
cout << "************输入0退出系统*************|" << endl;
cout << "---------------------------------------" << endl;
}
else {
cout << "系统将自动为您解压您保存在本地的asc.txt文件" << endl;
tongji(chars, n);
Decoding(huffnode, n);
cout << "=>解压成功,内容已保存在本地outcome.txt文件" << endl;
cout << "------------------------------------------" << endl;
cout << "********请选择:1 压缩 2 解压 ********|" << endl;
cout << "!!!!!注意!!!!! |" << endl;
cout << "****系统只支持编码或者解压分开进行****|" << endl;
cout << "************输入0退出系统*************|" << endl;
cout << "---------------------------------------" << endl;
}
cin >> x;
if (x == 0) {
cout << "您已成功退出系统!";
exit(0);
}
}//进行循环,保证能编码和译码都能进行
return 0;
}