题目
1.背景知识
二叉树的应用、赫夫曼树。
2.目的要求
掌握赫夫曼树和赫夫曼编码的基本思想和算法的程序实现。
3.实验内容
实现文件中数据的加解密与压缩:将硬盘上的一个文本文件进行加密,比较加密文件和原始文件的大小差别;对加密文件进行解密,比较原始文件和解码文件的内容是否一致。
4.实验说明
- 输入和输出:
(1)输入:硬盘上给定的原始文件及文件路径。
(2)输出:
硬盘上的加密文件及文件路径;
硬盘上的解码文件及文件路径;
原始文件和解码文件的比对结果。 - 实验要求:
提取原始文件中的数据(包括中文、英文或其他字符),根据数据出现的频率为权重,构建Huffman编码表;
根据Huffman编码表对原始文件进行加密,得到加密文件并保存到硬盘上;
将加密文件进行解密,得到解码文件并保存到硬盘上;
比对原始文件和解码文件的一致性,得出是否一致的结论。 - 参考类型定义 //双亲孩子表示法
typedef struct {
unsigned int weight;
unsigned int parent, lchild, rchild;
} HTNode, *HuffmanTree; // 动态分配数组存储赫夫曼树
typedef char * * HuffmanCode; // 动态分配数组存储赫夫曼编码表
5.注意问题:
- 本实验涉及到赫夫曼树和赫夫曼编码基本思想和构建方法;C语言文件的建立、读取、写入方法;C语言位运算等知识。
- 请在实验报告中说明Huffman编码表的构建过程。
解答
1.文件要求
代码和源文件
r
e
s
o
u
r
c
e
.
t
x
t
resource.txt
resource.txt (内容可以自定义)要在同一目录下,运行代码后在目录下会生成两个文件
H
u
f
f
m
a
n
C
o
d
e
.
t
x
t
,
T
r
a
n
s
l
a
t
e
d
F
i
l
e
.
t
x
t
Huffman Code.txt,~Translated File.txt
HuffmanCode.txt, TranslatedFile.txt,分别存放源文件编码后的内容、编码后的文件解码出的内容。
如下:
C
o
d
e
Code
Code
#include <bits/stdc++.h>
using namespace std;
typedef struct{
unsigned int weight;
unsigned int parent, lchild, rchild;
char c;
}ht_node, *huffman_tree;
typedef char** huffman_code;
string vs;
map<char, int> power; // 每个字符的出现次数/权值
vector<int> ch_list;
vector<int> pw_list; // 权值的有序集合
map<char, string> mp; // 字符->编码
int n;
int s1, s2;
string vs_ht; // 生成的Huffman编码
map<string, char> mp_ht; // 编码->字符
string s_trans; // 解压好的字符串
void select(huffman_tree &ht, int ed) {
int minn = 0x3f3f3f3f;
for (int i = 1; i <= ed; i ++) {
if (ht[i].parent == 0 && ht[i].weight < minn) {
minn = ht[i].weight;
s1 = i;
}
}
minn = 0x3f3f3f3f;
for (int i = 1; i <= ed; i ++) {
if (ht[i].parent == 0 && ht[i].weight < minn && i != s1) {
minn = ht[i].weight;
s2 = i;
}
}
}
void huffman_coding(huffman_tree &ht) {
if (n <= 1) {
return;
}
int m = 2 * n - 1; // 树共m个节点
ht = (huffman_tree)malloc((m + 1) * sizeof(ht_node));
ht_node *p = ht;
p ++;
for (int i = 1; i <= n; i ++, p ++) {
*p = {(unsigned int)(pw_list[i - 1]), 0, 0, 0, (char)ch_list[i - 1]};
// cout << pw_list[i - 1] << " 0 0 0 " << (char)ch_list[i - 1] << "\n";
}
for (int i = n + 1; i <= m; i ++, p ++) {
// auto [w, p, l, r] = ht[i];
*p = {(unsigned int)(0), 0, 0, 0, '@'};
// cout << w << " " << p << " " << l << " " << r << "\n";
}
for (int i = n + 1; i <= m; i ++) {
select(ht, i - 1);
ht[s1].parent = i; ht[s2].parent = i;
ht[i].lchild = s1; ht[i].rchild = s2;
ht[i].weight = ht[s1].weight + ht[s2].weight;
}
for (int i = 1; i <= n; i ++) {
string cd;
for (int c = i, f = ht[i].parent; f; c = f, f = ht[f].parent) {
if (ht[f].lchild == c) {
cd.insert(cd.begin(), '0');
} else {
cd.insert(cd.begin(), '1');
}
if (power[ht[i].c]) {
mp[ht[i].c] = cd;
}
}
}
}
void read() {
ifstream ip_file;
ip_file.open("resource.txt");
if (ip_file.is_open()) {
int is_first = 1;
string s;
while (getline(ip_file, s)) {
if (is_first) {
is_first = 0;
} else {
vs += "\n";
}
vs += s;
}
cout << "resource.txt读取成功\n";
ip_file.close();
} else {
cout << "无法读取resource.txt\n";
}
}
void get_cnt() {
for (auto p : vs) {
power[p] ++;
}
for (auto p : power) {
ch_list.push_back(p.first);
pw_list.push_back(p.second);
}
n = ch_list.size();
}
void write() {
ofstream op_file;
op_file.open("Huffman Code.txt");
if (op_file.is_open()) {
// cout << "vs: [" << vs << "]\n\n";
for (auto p : vs) {
op_file << mp[p];
}
cout << "Huffman Code.txt写入成功\n";
op_file.close();
} else {
cout << "无法保存Huffman编码到硬盘\n";
}
}
void read_ht() {
ifstream ip_file;
ip_file.open("Huffman Code.txt");
if (ip_file.is_open()) {
getline(ip_file, vs_ht);
cout << "Huffman Code.txt读取成功\n";
ip_file.close();
} else {
cout << "无法读取Huffman Code.txt\n";
}
}
void translate_huffman() {
int m = vs_ht.size();
// cout << "m: " << m << "\n";
for (int i = 0; i <= m - 1; i ++) {
int ok = 0;
int l = i, r = i - 1;
do{
r ++;
string sbsr = vs_ht.substr(l, r - l + 1);
// cout << sbsr << "\n";
if (mp_ht[sbsr]) {
// cout << "l: " << l << " ," << "r: " << r << "\n";
s_trans += char(mp_ht[sbsr]);
i = r;
ok = 1;
break;
}
}while (r + 1 <= m - 1);
if (ok == 0) {
cout << "解压错误,出现不能解压的编码QAQ\n";
// cout << "m: " << m << "\n";
// cout << "l: " << l << "\n";
// cout << vs_ht.substr(l) << "\n";
exit(0);
}
}
}
void write_ht() {
ofstream op_file;
op_file.open("Translated File.txt");
if (op_file.is_open()) {
op_file << s_trans;
cout << "Translated File.txt写入成功\n";
op_file.close();
} else {
cout << "无法保存解压后的文本到硬盘\n";
}
}
int main() {
read(); // 读取源文件
// cout << "源文件内容(vs)[" << vs << "]\n";
get_cnt();
// cout << "power: ";
// for (auto p : power) {
// cout << "[" << p.first << "]->[" << p.second << "], ";
// }
// cout << "\n\n";
// cout << "ch_list: ";
// for (auto p : ch_list) {
// cout << "[" << (char)(p) << "]";
// }
// cout << "\n\n";
// cout << "pw_list: ";
// for (auto p : pw_list) {
// cout << "[" << p << "]";
// }
// cout << "\n\n";
huffman_tree ht;
huffman_coding(ht);
// cout << "n: " << n << "\n";
// cout << "mp.size: " << mp.size() << "\n";
// for (auto p : mp) {
// cout << "[" << p.first << "]->[" << p.second << "]\n";
// }
// cout << "\n";
write(); // 写入Huffman编码
read_ht(); // 读取Huffman编码
// cout << "huff编码[" << vs_ht << "]\n";
for (auto p : mp) {
mp_ht[p.second] = p.first;
// cout << "[" << p.second << "]<-[" << p.first << "]\n";
}
// cout << "\n";
translate_huffman();
write_ht();
return 0;
}