/*头文件*/
#ifndef HUFFMAN_H_INCLUDED
#define HUFFMAN_H_INCLUDED
#include<cstdio>
#include<iostream>
#include<string>
#include<algorithm>
#include "windows.h"
#include<cstring>
#include<map>
#include<stdlib.h>
#define MAXN 256
using namespace std;
struct TreeNode{
unsigned char c;
int num;
int left, right;
};
void set_console_color(unsigned short color_index);
class HUFFMAN{
private:
TreeNode treenode[MAXN*2];
int root;
string huffman_code[MAXN*2];
public:
HUFFMAN();
~HUFFMAN();
void build();///从文件中读入信息,统计字符频率, 然后建立Huffman树
int getRoot();
void getCode(int root, string code);///根据Huffman树得到Huffman编码保存在huffman_code数组中
void show_huff_code();
void encode();///对文件进行压缩
void decode();///对文件进行解压
void show(int r);///打印输出Huffman树,及对应的编码
};
#endif // HUFFMAN_H_INCLUDED
/*类定义文件*/
#include"huffman.h"
#include "stdio.h"
#include "windows.h"
#include<cstring>
#include<map>
#include<algorithm>
#include<stdlib.h>
using namespace std;
void set_console_color(unsigned short color_index) {
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), color_index);
}
void locate(int x,int y) {
HANDLE out_handle=GetStdHandle(STD_OUTPUT_HANDLE);
COORD loc;
loc.X=x;
loc.Y=y;
SetConsoleCursorPosition(out_handle, loc);
}
void move(int x,int y) {
HANDLE out_handle=GetStdHandle(STD_OUTPUT_HANDLE);
CONSOLE_SCREEN_BUFFER_INFO info;
GetConsoleScreenBufferInfo(out_handle,&info);
COORD loc;
loc.X=info.dwCursorPosition.X+x;
loc.Y=info.dwCursorPosition.Y+y;
SetConsoleCursorPosition(out_handle, loc);
}
HUFFMAN::HUFFMAN() {
}
HUFFMAN::~HUFFMAN() {
}
int HUFFMAN::getRoot() {
return root;
}
void HUFFMAN::build() {
int fluency[MAXN], cnt;
memset(fluency, 0, sizeof(fluency));
set_console_color(15);
char filename[MAXN];
unsigned char ch;
printf("请输入待统计的文件名\n");
scanf("%s", filename);
FILE *fp = fopen(filename, "r");
if(fp == NULL) {
printf("打开文件失败!");
} else {
ch = fgetc(fp);
int tot = 0;
while(!feof(fp)) {
fluency[ch]++;
ch = fgetc(fp);
tot++;
}
printf("各个字符的频率为:\n");
set_console_color(46);
printf("---字符--------出现的次数---------频率-----\n");
set_console_color(47);
cnt = 0;
for(int i = 0; i < MAXN; ++ i) {
if(fluency[i] != 0) {
if((char)i == '\n') {
printf("换行符 %15d %lf\n", fluency[i], double(fluency[i])/(double)tot);
} else if((char)i == ' ') {
printf(" 空格 %15d %lf\n", fluency[i], double(fluency[i])/(double)tot);
} else {
printf("%5c %15d %lf\n", i, fluency[i], double(fluency[i])/(double)tot);
}
treenode[cnt].c = i;
treenode[cnt].num = fluency[i];
treenode[cnt].left = treenode[cnt].right = -1;
cnt++;
}
}
}
fclose(fp);
bool choosen[MAXN*2];
memset(choosen, 0, sizeof(choosen));
int min1 = 0, min2 = 1;
int count = cnt;
while(count != 1) {
for(int i = 0; i < cnt; ++ i) {
if(choosen[i]) {
continue;
}
if(i != min2 && (min1 == -1 || treenode[i].num < treenode[min1].num)) {
min1 = i;
}
if(i != min1 && (min2 == -1 || treenode[i].num < treenode[min2].num)) {
min2 = i;
}
}
choosen[min1] = choosen[min2] = 1;
treenode[cnt].c = '#';
treenode[cnt].num = treenode[min1].num + treenode[min2].num;
treenode[cnt].left = min1;
treenode[cnt].right = min2;
cnt++;
count--;
min1 = min2 = -1;
}
cnt--;
root = cnt;
set_console_color(15);
printf("huffman 树已经建好\n");
}
void HUFFMAN::getCode(int root, string code) {
if(treenode[root].left != -1) {
getCode(treenode[root].left, code+'0');
} else {
huffman_code[treenode[root].c] = code;
}
if(treenode[root].right != -1) {
getCode(treenode[root].right, code+'1');
}
}
void HUFFMAN::show_huff_code() {
set_console_color(46);
printf("---字符---- --- huffman编码----\n");
set_console_color(47);
for(int i = 0; i < 2*MAXN; ++ i) {
if(huffman_code[i] != "") {
if((char)i == '\n') {
printf(" 换行符 ---%25s----\n", huffman_code[i].c_str());
} else if((char)i == ' ') {
printf(" 空格 ---%25s----\n", huffman_code[i].c_str());
} else {
printf(" %c ---%25s----\n", i, huffman_code[i].c_str());
}
}
}
set_console_color(15);
}
void HUFFMAN::encode() {
char name[MAXN];
printf("请输入待压缩的文件名:(扩展名为.txt)\n");
scanf("%s", name);
FILE *in = fopen(name, "r"), *out;
if(in == NULL) {
printf("打开文件出错\n");
} else {
unsigned char c;
string res = "";
int in_len = 0;
while(fscanf(in, "%c", &c) != EOF) {
in_len++;
res += huffman_code[c];
}
printf("原文件的大小为: %.2lfKB\n", double(in_len)/1024);
printf("请输入压缩后的文件名:(扩展名为.huf)\n");
char tar_name[MAXN];
scanf("%s", tar_name);
out = fopen(tar_name, "wb");
int off = 8 - res.length()%8;
fprintf(out, "%d", off);
for(int i = 0; i < off; ++ i) {//编码长度不为8的倍数,就在末尾追加’0’
res.append("0");
}
int cur = res[0] - '0';
int out_len = 0;
for(int i = 1; res[i] != '\0'; ++ i) {
if(i%8 == 7) {
cur <<= 1;
cur += res[i] - '0';
out_len++;
fprintf(out, "%c", (unsigned char)cur);
cur = 0;
} else {
cur <<= 1;
cur += res[i] - '0';
}
}
fclose(out);
printf("压缩成功\n");
printf("压缩后文件大小为: %.2lfKB\n", (double)out_len/1024);
printf("压缩率为: %.2lf%%\n", (double)out_len/(double)in_len*100);
}
fclose(in);
}
void HUFFMAN::decode() {
unsigned char c;
char tar_name[MAXN], t_name[MAXN];
string cur = "", result = "";
printf("请输入待解压的文件名:(扩展名为.huf)\n");
scanf("%s", tar_name);
FILE *in = fopen(tar_name, "rb"), *out;
if(in == NULL) {
printf("打开文件失败!");
} else {
int cnt;
int off;
fscanf(in, "%d", &off);
while(fscanf(in, "%c", &c) != EOF) {
cur = "";
cnt = 0;
while(cnt != 8) {
cur += c%2 + '0';
c /= 2;
cnt++;
}
reverse(cur.begin(), cur.end());
result += cur;
}
int res = root;
result = result.substr(0, result.length() - off);
printf("请输入解压后的文件名:(扩展名为.txt)\n");
scanf("%s", t_name);
out = fopen(t_name, "w");
for(int i = 0; result[i] != '\0'; ++ i) {
if(result[i] == '0') {
res = treenode[res].left;
} else {
res = treenode[res].right;
}
if(treenode[res].left == -1 && treenode[res].right == -1) {
fprintf(out, "%c", treenode[res].c);
res = root;
}
}
fclose(out);
printf("解压成功!\n");
}
fclose(in);
}
void HUFFMAN::show(int r) {
/*总体设计思路就是先打印r对应的treenode[r]的字符域,然后保存当前光标的位置pre_x, pre_y,
当判断前结点的左子结点是否存在,若存在递归调用show函数,打印左子结点,记录此时的y坐标
cur_y,接下来回溯,将光标重新定位到之前的pre_x,pre_y,竖着移动到(pre_x, cur_y),然后递归调用show函数*/
if(treenode[r].c == '\n') {
set_console_color(6);
printf("|-");
set_console_color(10);
printf("换");
} else if(treenode[r].c == ' ') {
set_console_color(6);
printf("|-");
set_console_color(10);
printf("空");
} else {
set_console_color(6);
printf("|--");
if(treenode[r].c == '#') {
set_console_color(2);
} else {
set_console_color(10);
}
printf("%c", treenode[r].c);
}
HANDLE out_handle = GetStdHandle(STD_OUTPUT_HANDLE);
CONSOLE_SCREEN_BUFFER_INFO info;
GetConsoleScreenBufferInfo(out_handle, &info);
int pre_x = info.dwCursorPosition.X;
int pre_y = info.dwCursorPosition.Y;
if(treenode[r].left != -1) {
move(-1, 1);
show(treenode[r].left);
HANDLE in_handle = GetStdHandle(STD_OUTPUT_HANDLE);
CONSOLE_SCREEN_BUFFER_INFO in_info;
GetConsoleScreenBufferInfo(in_handle, &in_info);
int cur_y = in_info.dwCursorPosition.Y;
locate(pre_x, pre_y+1);
set_console_color(6);
for(int i = pre_y; i < cur_y-1; ++ i) {
move(-1, 1);
printf("|");
}
move(-1, 1);
show(treenode[r].right);
}
}
/*main.cpp*/
#include <iostream>
#include"huffman.h"
using namespace std;
int main() {
system("mode con cols=200 lines=500");
set_console_color(15);
HUFFMAN *huff_tree = new HUFFMAN();
printf("请选择功能: \n0, 退出\n1, 统计字符频率,并建立Huffman树\n");
printf("2, 获取Huffman编码\n");
printf("3, 将Huffman编码输出到屏幕中\n");
printf("4, 利用Huffman编码对文件进行压缩\n");
printf("5, 利用Huffman编码对文件进行解压\n");
printf("6, huffman树的树形输出\n");
int choice;
scanf("%d", &choice);
while(choice) {
switch(choice) {
case 1:
huff_tree->build();
break;
case 2:
huff_tree->getCode(huff_tree->getRoot(), "");
printf("huffman编码已获取\n\n");
break;
case 3:
huff_tree->show_huff_code();
break;
case 4:
huff_tree->encode();
break;
case 5:
huff_tree->decode();
break;
case 6:
huff_tree->show(huff_tree->getRoot());
printf("\n");
}
set_console_color(15);
printf("请选择功能: \n0, 退出\n1, 统计字符频率,并建立Huffman树\n");
printf("2, 获取Huffman编码\n");
printf("3, 将Huffman编码输出到屏幕中\n");
printf("4, 利用Huffman编码对文件进行压缩\n");
printf("5, 利用Huffman编码对文件进行解压\n");
printf("6, huffman树的树形输出\n");
scanf("%d", &choice);
}
return 0;
}
对于build()函数,统计就直接用一个int型的数组fluency, fluency[i]表示ascii码值为i的字符出现的次数(程序设计时用的是unsigned char),初始时memset为0,然后建Huffman树,树的结点类型是TreeNode,有四个数据成员 unsigned char c;//结点的字符,int num;//该字符出现的次数,int left, right;//结点的左右儿子的编号,建树的时候每次选出num域最小的两个结点,然后新建一个结点加到treenode数组尾,num域为其两者之和,直到最后只剩一个结点。
对于getCode()函数,从Huffman树的根对树进行深度优先遍历,向左编码+’0’向右编码+’1’, 到达叶子结点时将编码存储在string类型的huffman_code数组中
对于show_huff_code()函数,需要注意的是如果字符为换行和空格需要进行特殊处理
对于encode()函数从文件中逐个读入字符,根据huffman_code数组,得到对应的huffman编码,然后追加到string类型的变量res的尾部,读入结束后,对res的每8位一取, 将这对应的八位转换成char类型,写入到对应的压缩文件中。读入字符的时候统计原来文件的长度in_len, 写文件的时候统计压缩后的文件的长度out_len. 最后计算压缩率out_len/in_len。
对于decode()函数,从压缩文件中逐个字符读入,读入后转换成对应的8位二进制数的01串,追加到string类型的result变量尾部,读入结束后,从huffman树的根节点开始走,若当前字符为’0’,向左子结点走,若当前字符为’1’,x向右走,当到达叶子结点就输出叶子结点所在位置的字符。写入到解压后的文件中。
对于show()函数,采用的是深度优先遍历的方法,还用到的一点技术就是windows编程中,对输出的光标的的定位,以及获取当前光标的位置。
下为输出图像