Huffman编码

/*头文件*/
#ifndef HUFFMAN_H_INCLUDED
#define HUFFMAN_H_INCLUDED
#include<cstdio>
#include<iostream>
#include<string>
#include<algorithm>
#include "windows.h"
#include<cstring>
#include<map>
#include<stdlib.h>
#define MAXN 256
using namespace std;

struct TreeNode{
   unsigned char c;
   int num;
   int left, right;
};

void set_console_color(unsigned short color_index);

class HUFFMAN{
private:
   TreeNode treenode[MAXN*2];
   int root;
   string huffman_code[MAXN*2];
public:
   HUFFMAN();
   ~HUFFMAN();
   void build();///从文件中读入信息,统计字符频率, 然后建立Huffman树
   int getRoot();
   void getCode(int root, string code);///根据Huffman树得到Huffman编码保存在huffman_code数组中
   void show_huff_code();
   void encode();///对文件进行压缩
   void decode();///对文件进行解压
   void show(int r);///打印输出Huffman树,及对应的编码
};

#endif // HUFFMAN_H_INCLUDED
/*类定义文件*/
#include"huffman.h"
#include "stdio.h"
#include "windows.h"
#include<cstring>
#include<map>
#include<algorithm>
#include<stdlib.h>

using namespace std;

void set_console_color(unsigned short color_index) {
   SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), color_index);
}

void locate(int x,int y) {
   HANDLE out_handle=GetStdHandle(STD_OUTPUT_HANDLE);
   COORD loc;
   loc.X=x;
   loc.Y=y;
   SetConsoleCursorPosition(out_handle, loc);
}

void move(int x,int y) {
   HANDLE out_handle=GetStdHandle(STD_OUTPUT_HANDLE);
   CONSOLE_SCREEN_BUFFER_INFO info;
   GetConsoleScreenBufferInfo(out_handle,&info);
   COORD loc;
   loc.X=info.dwCursorPosition.X+x;
   loc.Y=info.dwCursorPosition.Y+y;
   SetConsoleCursorPosition(out_handle, loc);
}

HUFFMAN::HUFFMAN() {

}

HUFFMAN::~HUFFMAN() {

}

int HUFFMAN::getRoot() {
   return root;
}

void HUFFMAN::build() {
   int fluency[MAXN], cnt;
   memset(fluency, 0, sizeof(fluency));
   set_console_color(15);
   char filename[MAXN];
   unsigned char ch;
   printf("请输入待统计的文件名\n");
   scanf("%s", filename);
   FILE *fp = fopen(filename, "r");
   if(fp == NULL) {
      printf("打开文件失败!");
   } else {
      ch = fgetc(fp);
      int tot = 0;
      while(!feof(fp)) {
         fluency[ch]++;
         ch = fgetc(fp);
         tot++;
      }
      printf("各个字符的频率为:\n");
      set_console_color(46);
      printf("---字符--------出现的次数---------频率-----\n");
      set_console_color(47);
      cnt = 0;
      for(int i = 0; i < MAXN; ++ i) {
         if(fluency[i] != 0) {
            if((char)i == '\n') {
               printf("换行符  %15d            %lf\n", fluency[i], double(fluency[i])/(double)tot);
            } else if((char)i == ' ') {
               printf("  空格  %15d            %lf\n", fluency[i], double(fluency[i])/(double)tot);
            } else {
               printf("%5c   %15d            %lf\n", i, fluency[i], double(fluency[i])/(double)tot);
            }
            treenode[cnt].c = i;
            treenode[cnt].num = fluency[i];
            treenode[cnt].left = treenode[cnt].right = -1;
            cnt++;
         }
      }

   }
   fclose(fp);
   bool choosen[MAXN*2];
   memset(choosen, 0, sizeof(choosen));
   int min1 = 0, min2 = 1;
   int count = cnt;
   while(count != 1) {
      for(int i = 0; i < cnt; ++ i) {
         if(choosen[i]) {
            continue;
         }
         if(i != min2 && (min1 == -1 || treenode[i].num < treenode[min1].num)) {
            min1 = i;
         }
         if(i != min1 && (min2 == -1 || treenode[i].num < treenode[min2].num)) {
            min2 = i;
         }
      }
      choosen[min1] = choosen[min2] = 1;
      treenode[cnt].c = '#';
      treenode[cnt].num = treenode[min1].num + treenode[min2].num;
      treenode[cnt].left = min1;
      treenode[cnt].right = min2;
      cnt++;
      count--;
      min1 = min2 = -1;
   }
   cnt--;
   root = cnt;
   set_console_color(15);
   printf("huffman 树已经建好\n");
}

void HUFFMAN::getCode(int root, string code) {
   if(treenode[root].left != -1) {
      getCode(treenode[root].left, code+'0');
   } else {
      huffman_code[treenode[root].c] = code;
   }
   if(treenode[root].right != -1) {
      getCode(treenode[root].right, code+'1');
   }
}

void HUFFMAN::show_huff_code() {
   set_console_color(46);
   printf("---字符----         ---              huffman编码----\n");
   set_console_color(47);
   for(int i = 0; i < 2*MAXN; ++ i) {
      if(huffman_code[i] != "") {
         if((char)i == '\n') {
            printf("  换行符            ---%25s----\n", huffman_code[i].c_str());
         } else if((char)i == ' ') {
            printf("   空格             ---%25s----\n", huffman_code[i].c_str());
         } else {
            printf("    %c               ---%25s----\n", i, huffman_code[i].c_str());
         }
      }
   }
   set_console_color(15);
}

void HUFFMAN::encode() {
   char name[MAXN];
   printf("请输入待压缩的文件名:(扩展名为.txt)\n");
   scanf("%s", name);
   FILE *in = fopen(name, "r"), *out;
   if(in == NULL) {
      printf("打开文件出错\n");
   } else {
      unsigned char c;
      string res = "";
      int in_len = 0;
      while(fscanf(in, "%c", &c) != EOF) {
         in_len++;
         res += huffman_code[c];
      }
      printf("原文件的大小为: %.2lfKB\n", double(in_len)/1024);
      printf("请输入压缩后的文件名:(扩展名为.huf)\n");
      char tar_name[MAXN];
      scanf("%s", tar_name);
      out = fopen(tar_name, "wb");
      int off = 8 - res.length()%8;
      fprintf(out, "%d", off);
      for(int i = 0; i < off; ++ i) {//编码长度不为8的倍数,就在末尾追加’0’
         res.append("0");
      }
      int cur = res[0] - '0';
      int out_len = 0;
      for(int i = 1; res[i] != '\0'; ++ i) {
         if(i%8 == 7) {
            cur <<= 1;
            cur += res[i] - '0';
            out_len++;
            fprintf(out, "%c", (unsigned char)cur);
            cur = 0;
         } else {
            cur <<= 1;
            cur += res[i] - '0';
         }
      }
      fclose(out);
      printf("压缩成功\n");
      printf("压缩后文件大小为: %.2lfKB\n", (double)out_len/1024);
      printf("压缩率为: %.2lf%%\n", (double)out_len/(double)in_len*100);
   }
   fclose(in);
}

void HUFFMAN::decode() {
   unsigned char c;
   char tar_name[MAXN], t_name[MAXN];
   string cur = "", result = "";
   printf("请输入待解压的文件名:(扩展名为.huf)\n");
   scanf("%s", tar_name);
   FILE *in = fopen(tar_name, "rb"), *out;
   if(in == NULL) {
      printf("打开文件失败!");
   } else {
      int cnt;
      int off;
      fscanf(in, "%d", &off);
      while(fscanf(in, "%c", &c) != EOF) {
         cur = "";
         cnt = 0;
         while(cnt != 8) {
            cur += c%2 + '0';
            c /= 2;
            cnt++;
         }
         reverse(cur.begin(), cur.end());
         result += cur;
      }
      int res = root;
      result = result.substr(0, result.length() - off);
      printf("请输入解压后的文件名:(扩展名为.txt)\n");
      scanf("%s", t_name);
      out = fopen(t_name, "w");
      for(int i = 0; result[i] != '\0'; ++ i) {
         if(result[i] == '0') {
            res = treenode[res].left;
         } else {
            res = treenode[res].right;
         }
         if(treenode[res].left == -1 && treenode[res].right == -1) {
            fprintf(out, "%c", treenode[res].c);
            res = root;
         }
      }
      fclose(out);
      printf("解压成功!\n");
   }
   fclose(in);
}

void HUFFMAN::show(int r) {
   /*总体设计思路就是先打印r对应的treenode[r]的字符域,然后保存当前光标的位置pre_x, pre_y,
   当判断前结点的左子结点是否存在,若存在递归调用show函数,打印左子结点,记录此时的y坐标
   cur_y,接下来回溯,将光标重新定位到之前的pre_x,pre_y,竖着移动到(pre_x, cur_y),然后递归调用show函数*/
   if(treenode[r].c == '\n') {
      set_console_color(6);
      printf("|-");
      set_console_color(10);
      printf("换");
   } else if(treenode[r].c == ' ') {
      set_console_color(6);
      printf("|-");
      set_console_color(10);
      printf("空");
   } else {
      set_console_color(6);
      printf("|--");
      if(treenode[r].c == '#') {
         set_console_color(2);
      } else {
         set_console_color(10);
      }
      printf("%c", treenode[r].c);
   }
   HANDLE out_handle = GetStdHandle(STD_OUTPUT_HANDLE);
   CONSOLE_SCREEN_BUFFER_INFO info;
   GetConsoleScreenBufferInfo(out_handle, &info);
   int pre_x = info.dwCursorPosition.X;
   int pre_y = info.dwCursorPosition.Y;
   if(treenode[r].left != -1) {
      move(-1, 1);
      show(treenode[r].left);
      HANDLE in_handle = GetStdHandle(STD_OUTPUT_HANDLE);
      CONSOLE_SCREEN_BUFFER_INFO in_info;
      GetConsoleScreenBufferInfo(in_handle, &in_info);
      int cur_y = in_info.dwCursorPosition.Y;
      locate(pre_x, pre_y+1);
      set_console_color(6);
      for(int i = pre_y; i < cur_y-1; ++ i) {
         move(-1, 1);
         printf("|");
      }
      move(-1, 1);
      show(treenode[r].right);
   }
}

/*main.cpp*/
#include <iostream>
#include"huffman.h"
using namespace std;

int main() {
   system("mode con cols=200 lines=500");
   set_console_color(15);
   HUFFMAN *huff_tree = new HUFFMAN();
   printf("请选择功能: \n0, 退出\n1, 统计字符频率,并建立Huffman树\n");
   printf("2, 获取Huffman编码\n");
   printf("3, 将Huffman编码输出到屏幕中\n");
   printf("4, 利用Huffman编码对文件进行压缩\n");
   printf("5, 利用Huffman编码对文件进行解压\n");
   printf("6, huffman树的树形输出\n");
   int choice;
   scanf("%d", &choice);
   while(choice) {
      switch(choice) {
      case 1:
         huff_tree->build();
         break;
      case 2:
         huff_tree->getCode(huff_tree->getRoot(), "");
         printf("huffman编码已获取\n\n");
         break;
      case 3:
         huff_tree->show_huff_code();
         break;
      case 4:
         huff_tree->encode();
         break;
      case 5:
         huff_tree->decode();
         break;
      case 6:
         huff_tree->show(huff_tree->getRoot());
         printf("\n");
      }
      set_console_color(15);
      printf("请选择功能: \n0, 退出\n1, 统计字符频率,并建立Huffman树\n");
      printf("2, 获取Huffman编码\n");
      printf("3, 将Huffman编码输出到屏幕中\n");
      printf("4, 利用Huffman编码对文件进行压缩\n");
      printf("5, 利用Huffman编码对文件进行解压\n");
      printf("6, huffman树的树形输出\n");
      scanf("%d", &choice);
   }
   return 0;
}

对于build()函数,统计就直接用一个int型的数组fluency, fluency[i]表示ascii码值为i的字符出现的次数(程序设计时用的是unsigned char),初始时memset为0,然后建Huffman树,树的结点类型是TreeNode,有四个数据成员 unsigned char c;//结点的字符,int num;//该字符出现的次数,int left, right;//结点的左右儿子的编号,建树的时候每次选出num域最小的两个结点,然后新建一个结点加到treenode数组尾,num域为其两者之和,直到最后只剩一个结点。

对于getCode()函数,从Huffman树的根对树进行深度优先遍历,向左编码+’0’向右编码+’1’, 到达叶子结点时将编码存储在string类型的huffman_code数组中

对于show_huff_code()函数,需要注意的是如果字符为换行和空格需要进行特殊处理

对于encode()函数从文件中逐个读入字符,根据huffman_code数组,得到对应的huffman编码,然后追加到string类型的变量res的尾部,读入结束后,对res的每8位一取, 将这对应的八位转换成char类型,写入到对应的压缩文件中。读入字符的时候统计原来文件的长度in_len, 写文件的时候统计压缩后的文件的长度out_len. 最后计算压缩率out_len/in_len。

对于decode()函数,从压缩文件中逐个字符读入,读入后转换成对应的8位二进制数的01串,追加到string类型的result变量尾部,读入结束后,从huffman树的根节点开始走,若当前字符为’0’,向左子结点走,若当前字符为’1’,x向右走,当到达叶子结点就输出叶子结点所在位置的字符。写入到解压后的文件中。

对于show()函数,采用的是深度优先遍历的方法,还用到的一点技术就是windows编程中,对输出的光标的的定位,以及获取当前光标的位置。

下为输出图像


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值