前言(迭代于2023/6/18)
大一下学期期末数据结构项目设计-哈夫曼编码压缩文本,目前这个项目仍有缺陷,不能正常地输出中文字符参与哈夫曼编码压缩文本运算;
需求文档:
注意事项
读取文本文件时会发生打不开问题:
这是因为打开的Txt文本的路径中带有了中文所以读取不了;
功能2-5中的显示:
Space -- 指代空格
LF -- 指代换行符
功能6中的读取注意事项:
功能6的哈夫曼编码表的书写规则如下:
空格 00;
代码部分:
#include <iostream> #include <queue> #include <unordered_map> #include <string> #include <fstream> #include <windows.h> #include <commdlg.h> #include <conio.h> using namespace std; //----------------------------------------------------------------------------------------------------------------------------- //全局变量 int key;//控制整体key事件 string text;//用于读取文本和哈夫曼压缩文本 string line; int cnt = 0;//用于读取文本的字符数 unordered_map<string, char> codes; // 储存哈夫曼编码表//用于功能6 // 结点类 class Node { public: char ch; // 字符 int freq; // 字符出现频率 Node* left; // 左孩子 Node* right; // 右孩子 Node() {} Node(char c, int f) { ch = c; freq = f; left = nullptr; right = nullptr; } ~Node() { delete left; delete right; } }; //----------------------------------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------------------------------- //函数区域 void menu();//打印选项菜单 void menuone();//打印次级选项菜单 void KeyEvent();//总体的选项事件 int readInfo();//使用者自主选择读入一段文本; void readInfoHand();//读取一个Txt文本(但是目前只开发了英文和标点符号的) // 建立哈夫曼树 Node* buildHuffmanTree(unordered_map<char, int>& freqMap); // 哈夫曼编码哈希表 void buildHuffmanCode(Node* root, string code, unordered_map<char, string>& huffmanCode); // 哈夫曼编码压缩 string compress(string text, unordered_map<char, string>& huffmanCode); // 哈夫曼解码,返回原始文本 string decompress(string compressedText, Node* root); void Hafuman();//哈夫曼编码压缩主体函数 void gotoxy(int x, int y);//光标定位 void changeColors();//改变控制台颜色函数 void HafumanYasuoShuchu();//哈夫曼编码压缩文本输出 void HafumanYasuoNumberShuchu();//哈夫曼编码的数量排序 void HafumanYasuoBianmaShuchu();//哈夫曼编码的展示 int getCompressedLength(const string& text, const unordered_map<char, string>& huffmanCode);//统计压缩后的字符数位数 int readInfoBiao();//用于功能6,读取哈夫曼编码表 int readInfoWenben();//用于功能6,读取哈夫曼压缩后的文本 void Jiema();//功能6,解码的函数汇总 //---------------------------------------------------------------------------------------------------------------------------- int main() { changeColors(); while (1) { menu(); KeyEvent(); if (key == 27)break; system("pause"); system("cls"); //清屏 } system("cls"); //清屏 return 0; } void KeyEvent() { key = _getch(); switch (key) { case 27: system("cls"); cout << "------退出中------" << endl; system("pause"); break; case 49: system("cls"); cout << "------功能模块1-----读取待编码文本------" << endl;//按的是数字1键----读取待编码文本 menuone(); int key2; key2 = _getch(); switch (key2) { case 49: system("cls"); cout << "------输入方式1-----Txt文本读入------" << endl; readInfo(); break; case 50: system("cls"); cout << "------输入方式2-----手动输入------" << endl; readInfoHand(); default:break; } break; case 50: system("cls"); cout << "------功能模块2-----显示文本中个字符的个数(以排序的方式升序显示)------" << endl;//按的是数字2键----显示文本中个字符的个数(以排序的方式升序显示) HafumanYasuoNumberShuchu(); break; case 51: system("cls"); cout << "------功能模块3-----文本编码并存储------" << endl; HafumanYasuoShuchu(); break; case 52: system("cls"); cout << "------功能模块4-----展示文本编码------" << endl; HafumanYasuoBianmaShuchu(); break; case 53: system("cls"); cout << "------功能模块5-----解码并将结果展示(输出到文件或者输出到屏幕)------" << endl; Hafuman(); break; case 54: system("cls"); cout << "------功能模块6-----读取哈夫曼编码表&哈夫曼压缩文本解压------" << endl; Jiema();//解码功能 break; default:cout << "------没有此选项,请重新选择-------" << endl; break; } } //----------------------------------------------------------------------------------------------------------------------------- void menu() { printf("|*======================================================*|\n"); printf("|* 哈夫曼编码压缩文本应用 *|\n"); printf("|*======================================================*|\n"); printf("|* 1.读取待编码文本(可选择多个文本读入) *|\n"); printf("|* 2.显示文本中个字符的个数(以排序的方式升序显示) *|\n"); printf("|* 3.文本编码并存储 *|\n"); printf("|* 4.展示文本编码 *|\n"); printf("|* 5.解码并将结果展示(输出到文件或者输出到屏幕) *|\n"); printf("|* 6.读取哈夫曼编码表&哈夫曼压缩文本解压 *|\n"); printf("|* #按ESC键退出程序 *|\n"); printf("|*======================================================*|\n"); printf(" 请输入功能选项1-5 \n"); } void menuone() { printf("|*======================================================*|\n"); printf("|* 请选择输入方式 *|\n"); printf("|*======================================================*|\n"); printf("|* 1.Txt文本输入 *|\n"); printf("|* 2.手动输入 *|\n"); printf("|*======================================================*|\n"); printf(" 请输入功能选项1-2 \n"); } int readInfo() { // 打开文件选择对话框 OPENFILENAMEA ofn; char szFile[260] = { 0 }; ZeroMemory(&ofn, sizeof(ofn)); ofn.lStructSize = sizeof(ofn); ofn.lpstrFile = szFile; ofn.nMaxFile = sizeof(szFile); ofn.lpstrFilter = "Text Files (*.txt)\0*.txt\0All Files (*.*)\0*.*\0"; ofn.nFilterIndex = 1; ofn.Flags = OFN_PATHMUSTEXIST | OFN_FILEMUSTEXIST | OFN_NOCHANGEDIR; if (GetOpenFileNameA(&ofn) == TRUE) { // 将 char 类型字符串转换为 wchar_t 类型字符串 int len = MultiByteToWideChar(CP_UTF8, 0, szFile, -1, NULL, 0); wchar_t* wstr = new wchar_t[len]; MultiByteToWideChar(CP_UTF8, 0, szFile, -1, wstr, len); wstring filename(wstr); delete[] wstr; ifstream infile(filename); if (!infile.is_open()) { cout << "要打开的文件名为:" << szFile << endl;//输出打开文件的路径 cout << "Failed to open file." << endl; return 1; } while (getline(infile, line)) { text += line + "\n"; } cnt = text.size();//用于统计文本字符数总数 cout << "成功读取文本---文本为以下内容:\n" << endl << "文本总字符数:" << cnt << endl << endl << text << endl; infile.close(); return 0; } else { cout << "用户取消了文件选择对话框." << endl; return 1; } } // 比较函数,用于优先队列中的排序 struct cmp { bool operator() (const Node* a, const Node* b) { return a->freq > b->freq; } }; // 建立哈夫曼树 Node* buildHuffmanTree(unordered_map<char, int>& freqMap) { priority_queue<Node*, vector<Node*>, cmp> pq; for (auto it : freqMap) { pq.push(new Node(it.first, it.second)); } while (pq.size() > 1) { Node* left = pq.top(); pq.pop(); Node* right = pq.top(); pq.pop(); Node* parent = new Node('$', left->freq + right->freq); parent->left = left; parent->right = right; pq.push(parent); } return pq.top(); } // 哈夫曼编码哈希表 void buildHuffmanCode(Node* root, string code, unordered_map<char, string>& huffmanCode) { if (!root) return; if (!root->left && !root->right) { // 叶子结点 huffmanCode[root->ch] = code; } buildHuffmanCode(root->left, code + "0", huffmanCode); buildHuffmanCode(root->right, code + "1", huffmanCode); } // 哈夫曼编码压缩 string compress(string text, unordered_map<char, string>& huffmanCode) { string compressedText = ""; for (char c : text) { compressedText += huffmanCode[c]; } return compressedText; } // 哈夫曼解码,返回原始文本 string decompress(string compressedText, Node* root) { string text = ""; Node* cur = root; for (char c : compressedText) { if (c == '0') { cur = cur->left; } else { cur = cur->right; } if (!cur->left && !cur->right) { // 叶子结点 text += cur->ch; cur = root; // 重置为根节点 } } return text; } void Hafuman() { cout << endl; cout << endl; if (text == "") { cout << "还未读取文本,此功能不可用" << endl; cout << endl; return; } // 统计字符出现频率 unordered_map<char, int> freqMap; for (char c : text) { freqMap[c]++; } // 建立哈夫曼树 Node* root = buildHuffmanTree(freqMap); // 建立哈夫曼编码哈希表 unordered_map<char, string> huffmanCode; buildHuffmanCode(root, "", huffmanCode); // 压缩文本 string compressedText = compress(text, huffmanCode); int compressedTextLength = getCompressedLength(text, huffmanCode);//统计压缩后文本的位数 cout << "------------------------------------------------------------------------------------------------------------------------" << endl; // 输出压缩后的文本和哈夫曼编码表 cout << endl; cout << "压缩后的文本:" << compressedText << endl << endl; cout << "------------------------------------------------------------------------------------------------------------------------" << endl; cout << "哈夫曼编码:" << "(数量从大到小排序;注意:文本参与哈夫曼压缩编码的包含:空格和换行符)" << endl; vector<pair<char, int>> sortedFreqMap(freqMap.begin(), freqMap.end()); sort(sortedFreqMap.begin(), sortedFreqMap.end(), [](const auto& a, const auto& b) { return a.second > b.second; }); // 按顺序输出哈夫曼编码 for (auto& it : sortedFreqMap) { char c = it.first; if (huffmanCode.count(c)) { if (c == 32)cout << "Space" << " :Numbers: " << freqMap[c] << " Huffman Code: " << huffmanCode[c] << endl; else if (c == 10)cout << "LF" << " :Numbers: " << freqMap[c] << " Huffman Code: " << huffmanCode[c] << endl; else cout << c << " :Numbers: " << freqMap[c] << " Huffman Code: " << huffmanCode[c] << endl; } } cout << "------------------------------------------------------------------------------------------------------------------------" << endl; cout << endl; // 解压文本 string decompressedText = decompress(compressedText, root); // 输出解压后的原始文本 cout << "解压后的原始文本:" << endl << decompressedText << endl; cout << "------------------------------------------------------------------------------------------------------------------------" << endl; cout << "压缩前文本位数:" << cnt * 8 << endl; cout << "压缩后文本位数:" << compressedTextLength << endl; double rate = (1-(compressedTextLength * 1.0 / (cnt * 8))) * 100; cout << "压缩率为:" << rate << "%" << endl; delete root; } //用于输出 " Nice to meet you " void gotoxy(int x, int y) {// 光标定位函数 COORD p;//定义结构体变量p HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE);// 获取当前函数句柄 p.X = x; p.Y = y;//将光标的目标移动位置传递给结构体 SetConsoleCursorPosition(handle, p);//移动光标 } void changeColors() { system("color F4");//改变屏幕的背景色和前景色 gotoxy(8, 3);//光标定位到指定位置 printf("欢迎使用哈夫曼文本压缩应用!\n"); gotoxy(8, 4);//光标定位到指定位置 cout << "请按Enter键进入" << endl; getchar(); } void readInfoHand() { cout << "请输入一段字符:" << endl; getline(cin, text); // 改为使用getline读入一行字符串 } void HafumanYasuoShuchu() { cout << endl; cout << endl; if (text == "") { cout << "还未读取文本,此功能不可用" << endl; cout << endl; return; } // 统计字符出现频率 unordered_map<char, int> freqMap; for (char c : text) { freqMap[c]++; } // 建立哈夫曼树 Node* root = buildHuffmanTree(freqMap); // 建立哈夫曼编码哈希表 unordered_map<char, string> huffmanCode; buildHuffmanCode(root, "", huffmanCode); // 压缩文本 string compressedText = compress(text, huffmanCode); cout << "------------------------------------------------------------------------------------------------------------------------" << endl; // 输出压缩后的文本和哈夫曼编码表 cout << endl; cout << "压缩后的文本:" << compressedText << endl; cout << "------------------------------------------------------------------------------------------------------------------------" << endl; } void HafumanYasuoNumberShuchu() { cout << endl; if (text == "") { cout << "还未读取文本,此功能不可用" << endl; cout << endl; return; } // 统计字符出现频率 unordered_map<char, int> freqMap; for (char c : text) { freqMap[c]++; } // 建立哈夫曼树 Node* root = buildHuffmanTree(freqMap); // 建立哈夫曼编码哈希表 unordered_map<char, string> huffmanCode; buildHuffmanCode(root, "", huffmanCode); cout << "------------------------------------------------------------------------------------------------------------------------" << endl; cout << "哈夫曼编码:" << "(数量从大到小排序;注意:文本参与哈夫曼压缩编码的包含:空格和换行符)" << endl; vector<pair<char, int>> sortedFreqMap(freqMap.begin(), freqMap.end()); sort(sortedFreqMap.begin(), sortedFreqMap.end(), [](const auto& a, const auto& b) { return a.second > b.second; }); // 按顺序输出哈夫曼编码 for (auto& it : sortedFreqMap) { char c = it.first; if (huffmanCode.count(c)) { if (c == 32)cout << "Space" << " :Numbers: " << freqMap[c] << endl; else if (c == 10)cout << "LF" << " :Numbers: " << freqMap[c] << endl; else cout << c << " :Numbers: " << freqMap[c] << endl; } } cout << "------------------------------------------------------------------------------------------------------------------------" << endl; } void HafumanYasuoBianmaShuchu() { cout << endl; cout << endl; if (text == "") { cout << "还未读取文本,此功能不可用" << endl; cout << endl; return; } // 统计字符出现频率 unordered_map<char, int> freqMap; for (char c : text) { freqMap[c]++; } // 建立哈夫曼树 Node* root = buildHuffmanTree(freqMap); // 建立哈夫曼编码哈希表 unordered_map<char, string> huffmanCode; buildHuffmanCode(root, "", huffmanCode); cout << "------------------------------------------------------------------------------------------------------------------------" << endl; cout << "哈夫曼编码:" << "(数量从大到小排序;注意:文本参与哈夫曼压缩编码的包含:空格和换行符)" << endl; vector<pair<char, int>> sortedFreqMap(freqMap.begin(), freqMap.end()); sort(sortedFreqMap.begin(), sortedFreqMap.end(), [](const auto& a, const auto& b) { return a.second > b.second; }); // 按顺序输出哈夫曼编码 for (auto& it : sortedFreqMap) { char c = it.first; if (huffmanCode.count(c)) { if (c == 32)cout << "Space" << " :Numbers: " << freqMap[c] << " Huffman Code: " << huffmanCode[c] << endl; else if (c == 10)cout << "LF" << " :Numbers: " << freqMap[c] << " Huffman Code: " << huffmanCode[c] << endl; else cout << c << " :Numbers: " << freqMap[c] << " Huffman Code: " << huffmanCode[c] << endl; } } cout << "------------------------------------------------------------------------------------------------------------------------" << endl; } // 计算经过哈夫曼编码压缩后的字符数 int getCompressedLength(const string& text, const unordered_map<char, string>& huffmanCode) { int compressedLen = 0; for (char c : text) { compressedLen += huffmanCode.at(c).size(); // 使用哈希表快速查找哈夫曼编码 } return compressedLen; } void Jiema() { cout << endl; cout << "读取哈夫曼编码表" << endl; readInfoBiao(); cout << "读取哈夫曼压缩后文本" << endl; system("pause"); cout << "------------------------------------------------------------------------------------------------------------------------" << endl; cout << "解压文本如下:" << endl; readInfoWenben(); cout << endl; cout << "解压完成!" << endl; } int readInfoBiao() { // 打开文件选择对话框 OPENFILENAMEA ofn; char szFile[260] = { 0 }; ZeroMemory(&ofn, sizeof(ofn)); ofn.lStructSize = sizeof(ofn); ofn.lpstrFile = szFile; ofn.nMaxFile = sizeof(szFile); ofn.lpstrFilter = "Text Files (*.txt)\0*.txt\0All Files (*.*)\0*.*\0"; ofn.nFilterIndex = 1; ofn.Flags = OFN_PATHMUSTEXIST | OFN_FILEMUSTEXIST | OFN_NOCHANGEDIR; if (GetOpenFileNameA(&ofn) == TRUE) { // 将 char 类型字符串转换为 wchar_t 类型字符串 int len = MultiByteToWideChar(CP_UTF8, 0, szFile, -1, NULL, 0); wchar_t* wstr = new wchar_t[len]; MultiByteToWideChar(CP_UTF8, 0, szFile, -1, wstr, len); wstring filename(wstr); delete[] wstr; ifstream infile(filename); if (!infile.is_open()) { cout << "要打开的文件名为:" << szFile << endl;//输出打开文件的路径 cout << "Failed to open file." << endl; return 1; } string line; while (getline(infile, line)) { // 逐行读取编码表数据 char ch = line[0]; // 获取字符 if (ch == 32) { cout << "------------------------------------------------------------------------------------------------------------------------" << endl; cout << "空格读取成功" << endl; } if (ch == '#') { //利用#字符代替换行符 ch = '\n'; cout << "换行符读取成功" << endl; cout << "------------------------------------------------------------------------------------------------------------------------" << endl; } string code = line.substr(2); // 获取编码 codes[code] = ch; // 将编码和字符对应存储到哈希表中 } infile.close(); return 0; } else { cout << "用户取消了文件选择对话框." << endl; return 1; } } int readInfoWenben() { // 打开文件选择对话框 OPENFILENAMEA ofn; char szFile[260] = { 0 }; ZeroMemory(&ofn, sizeof(ofn)); ofn.lStructSize = sizeof(ofn); ofn.lpstrFile = szFile; ofn.nMaxFile = sizeof(szFile); ofn.lpstrFilter = "Text Files (*.txt)\0*.txt\0All Files (*.*)\0*.*\0"; ofn.nFilterIndex = 1; ofn.Flags = OFN_PATHMUSTEXIST | OFN_FILEMUSTEXIST | OFN_NOCHANGEDIR; if (GetOpenFileNameA(&ofn) == TRUE) { // 将 char 类型字符串转换为 wchar_t 类型字符串 int len = MultiByteToWideChar(CP_UTF8, 0, szFile, -1, NULL, 0); wchar_t* wstr = new wchar_t[len]; MultiByteToWideChar(CP_UTF8, 0, szFile, -1, wstr, len); wstring filename(wstr); delete[] wstr; ifstream infile(filename); if (!infile.is_open()) { cout << "要打开的文件名为:" << szFile << endl;//输出打开文件的路径 cout << "Failed to open file." << endl; return 1; } string code = ""; char ch; while (infile >> ch) { // 读入每一个字符 code += ch; if (codes.find(code) != codes.end()) { // 如果匹配成功 cout << codes[code]; // 将对应字符输出到控制台 code = ""; // 重置编码 } } return 0; } else { cout << "用户取消了文件选择对话框." << endl; return 1; } }