实验内容及要求:
从字符文件读取若干个大写英文字符(英文字符种类数m建议为6至8种,如:m=6,则英文字符可取A-F),统计m种英文字符的出现频度,构造Huffman二叉树,对所有英文字符进行Huffman编码,将编码后的比特流用byte型(或char型)数组实现存储。在屏幕上输出该比特流的压缩率,然后利用该数组和Huffman二叉树进行译码,将译码后的字符序列输出到另一个字符文件。
提示:(1) 输入与输出字符文件每10个字符一行;
(2) 输入文件中的不可显示字符(如:回车、换行符)不进行统计和编码;
(3) 压缩率=平均编码长度/log2m。
实验目的:掌握Huffman二叉树及Huffman编、译码。
实验内容及要求:
从字符文件读取若干个大写英文字符(英文字符种类数m建议为6至8种,如:m=6,则英文字符可取A-F),统计m种英文字符的出现频度,构造Huffman二叉树,对所有英文字符进行Huffman编码,将编码后的比特流用byte型(或char型)数组实现存储。在屏幕上输出该比特流的压缩率,然后利用该数组和Huffman二叉树进行译码,将译码后的字符序列输出到另一个字符文件。
提示:(1) 输入与输出字符文件每10个字符一行;
(2) 输入文件中的不可显示字符(如:回车、换行符)不进行统计和编码;
(3) 压缩率=平均编码长度/log2m。
实验目的:掌握Huffman二叉树及Huffman编、译码。
算法设计简要描述:
输入/输出设计简要描述:
输入与输出字符文件每10个字符一行。
输出有文字提示。
测试案例(此处就不截图了):
从文件read.txt读取若干个大写英文字符、从read1.txt读取需要译码的二进制序列:
AAAAAAAABC、111111110001
输出译码后的字符序列到write.txt:
AAAAAAAABC
压缩率:0.757116
代码:
#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
#define READ_ROOT "read.txt"
#define READ_decode "read1.txt"
#define WRITE_ROOT "write.txt"
//最大字符数
#define MAX_TEXT 1000
//最大不同字符个数
#define MAX_LEAF 26
//最多26个字母,所以最少用五位来表示
#define MAX_BITS (MAX_LEAF - 1)
//最大节点个数
#define MAX_NODE (MAX_LEAF * 2 - 1)
using namespace std;
typedef struct HTNode {
double weight;
int parent;
int lchild;
int rchild;
}HFNode;
typedef struct HCode {
char code[MAX_BITS]{};//符号编码
int start;//编码在code[]的起始坐标
int length{0};//编码长度
}HFCode;
class Huffman {
private:
fstream file;
//字符种类数
int kinds{0};
//源文本
char src_text[MAX_TEXT]{};
//源文本出现的所有字符
vector<char> text;
//频数
vector<int> count;
//频率(权值)
vector<double> weight;
//权值最小的节点索引
int s1{0}, s2{0};
HFNode hfNode[MAX_NODE]{};//码树
HFCode hfCode[MAX_LEAF]{};//编码表
public:
Huffman() = default;
//文件读取
bool DataRead(const string& root);
//统计频数.计算频率
void Statistics();
//选择权值最小的两个节点
void Select(int range);
//构建哈夫曼树
bool createHFTree();
//构建哈夫曼编码
void createHFCode();
//求压缩率
void solveCompressibility();
//输出哈夫曼编码
bool writeHFCode(const string& root);
//译码
void dec(char receive[], char decoded[]);
void visit();
~Huffman() = default;
};
void Huffman::visit()
{
for (int i = 0; i< text.size(); i++)
{
cout << text[i]<<" ";
}
}
//文件读取
bool Huffman::DataRead(const std::string& root) {
file.open(root, ios::in);
if (!file.is_open()) {
cout << "Wrong Root!" << endl;
return false;
}
else {
int i = 0;
while (file >> src_text[i++]);
file.close();
return true;
}
}
//统计频数.计算频率
void Huffman::Statistics() {
int sum = 0;
for (auto& letter : src_text) {
if (letter >= 'A' && letter <= 'Z') {
bool exist = false;
sum++;//总频数+1
for (auto& i : text) {
if (letter == i) {
exist = true;
break;
}
}
if (!exist) {
text.push_back(letter);//之前未出现的字母加入出现的字符序列
count.push_back(1);//对应频数初始化
kinds++;//字符种类数+1
}
else {
for (int i = 0; i < text.size(); i++) {
if (text[i] == letter) {
count[i]++;//已经出现过,对应频数+1
break;
}
}
}
}
}
for (int i : count) {
double temp = static_cast<double>(i) / static_cast<double>(sum);
weight.push_back(temp);
}
}
//选择权值最小的两个节点
void Huffman::Select(int range) {
double min1 = 1;
for (int i = 0; i < range; i++) {
if (hfNode[i].weight < min1 && hfNode[i].parent == -1) {
min1 = hfNode[i].weight;
s1 = i;
}
}
double min2 = 1;
for (int i = 0; i < range; i++) {
if (hfNode[i].weight < min2 && hfNode[i].parent == -1 && i != s1) {
min2 = hfNode[i].weight;
s2 = i;
}
}
}
//构建哈夫曼树
bool Huffman::createHFTree() {
if (kinds > MAX_LEAF||kinds <= 1) {
cout << "Wrong Range!" << endl;
return false;
}
//码树一共有2 * kinds - 1个结点
int m = 2 * kinds - 1;
//-1表示空指针,初始化所有结点parent都为-1
for (int i = 0; i <m; i++) {
if(i<kinds) hfNode[i].weight = weight[i];//前kinds个为所有的字母结点
else hfNode[i].weight = 0;//后面的kinds-1个为合并出的子树的根结点
hfNode[i].parent = hfNode[i].lchild = hfNode[i].rchild = -1;
}
for (int i = kinds; i < m; i++) {
Select(i);
hfNode[s1].parent =hfNode[s2].parent = i;
hfNode[i].rchild = s2;
hfNode[i].lchild = s1;
hfNode[i].weight = hfNode[s1].weight + hfNode[s2].weight;
}
return true;
}
//构建哈夫曼编码
void Huffman::createHFCode() {
int start;
for (int i = 0; i < kinds; i++) {
start = kinds-2;
for (int c = i, f = hfNode[i].parent; f != -1; c = f, f = hfNode[f].parent) {
if (c == hfNode[f].lchild) hfCode[i].code[start--] = '0';
else hfCode[i].code[start--] = '1';
hfCode[i].length++;
}
hfCode[i].start = start + 1;//编码起始坐标
}
}
//求压缩率
void Huffman::solveCompressibility() {
double average_length = 0;
for (int i = 0; i < kinds; i++) {
int length = hfCode[i].length;
average_length += static_cast<double>(length) * weight[i];
}
cout << "压缩率: " << average_length / log2(kinds) << endl;
}
//输出哈夫曼编码,测试用
bool Huffman::writeHFCode(const std::string& root) {
file.open(root, ios::out);
if (!file.is_open()) {
cout << "Wrong Root!" << endl;
return false;
}
else {
//输出每个字母的哈夫曼编码
for (int i = 0; i < kinds; i++) {
file << text[i] << ":";
for (int j = hfCode[i].start; j < kinds - 1; j++) {
file << hfCode[i].code[j];
}
file << endl;
}
file << endl;
//输出源字符串的哈夫曼编码
int counter = 0;
for (int i = 0; src_text[i] != '\0'; i++) {
for (int j = 0; j < text.size(); j++) {
if (src_text[i] == text[j]) {
for (int k = hfCode[j].start; k < kinds - 1; k++) {
file << hfCode[j].code[k];
counter++;
if (counter == 10) {
file << endl;
counter = 0;
}
}
break;
}
}
}
return true;
}
}
//译码
void Huffman::dec(char receive[], char decoded[])
{
file.open(READ_decode, ios::in);
int m = 0,n=0; int counter = 0;
while (file >> receive[m++]);
file.close();
receive[m] = '\0';
int i = 0,j = 0;
while (receive[i] != '\0')
{
int k = 2 * kinds - 2; //k指向根结点
while (k >= kinds && receive[i] != '\0')
if (receive[i++] == '0') k = hfNode[k].lchild;
else k = hfNode[k].rchild;
if (k < kinds) decoded[j++] = text[k]; //输出一个符号
//若k<kinds,则k指向一个叶子结点
}
decoded[j] = '\0';
file.open(WRITE_ROOT, ios::out);
while (decoded[n]!='\0')
{
file << decoded[n++];
counter++;
if (counter == 10) {
file << endl;
counter = 0;
}
};
file.close();
}
int main() {
Huffman huffman;
char receive[MAX_TEXT]{};
char decoded[MAX_TEXT]{};
if (!huffman.DataRead(READ_ROOT)) {
return -1;
}
huffman.Statistics();
if (!huffman.createHFTree()) {
return -1;
}
huffman.createHFCode();
huffman.solveCompressibility();
/*if (!huffman.writeHFCode(WRITE_ROOT)) {
return -1;
}*/
huffman.dec(receive,decoded);
/* for (int i = 0; decoded[i] != '\0'; i++)
{
cout << decoded[i];
}*/
/*for (int i = 0; receive[i] != '\0'; i++)
{
cout << receive[i];
}*/
return 0;
}
参考(基本是原基础上修改添加):西南交通大学数据结构第七次实验报告--Huffman编解码的实现_Jellyfish Knight的博客-CSDN博客