文件压缩

最新推荐文章于 2023-03-08 21:59:55 发布

Adoiphs

最新推荐文章于 2023-03-08 21:59:55 发布

阅读量261

点赞数

本文链接：https://blog.csdn.net/Adoiphs/article/details/79941327

版权

#pragma once
#include <iostream>
#include <vector>

using namespace std;
template<class T>
class Greate//大堆
{
public:
bool operator()(const T& left, const T& right)//仿函数，重载()大于
{
  return left > right;
}
};
template<class T>
class Less//小堆
{
public:
bool operator()(const T& left, const T& right)//仿函数，重载()小于
{
  return left < right;
}
};
template<class T, class Compare = Less<T>>//第二个模板参数默认为大堆
class Myheap
{
public:
//空堆
Myheap()
{}
Myheap(const T arr[], size_t size)
{
  assert(arr);
  _heap.resize(size);//开辟空间(比vector的pushback好)
  for (size_t idx = 0; idx < size; ++idx)
  {
   _heap[idx] = arr[idx];
  }
  //size-1为最后一个节点的下标，(size-1-1)/2 = 父节点
  int Root = (_heap.size() - 2) >> 1;
  for (; Root >= 0; --Root)
  {
   AdjustDown(Root);
  }
}
//判空
bool Empty()const
{
  return _heap.empty();
}
//堆的大小
size_t Size()const
{
  return _heap.size();
}
//堆顶元素
T& Top()
{
  assert(_heap.size() > 0);
  return _heap[0];
}
const T& Top()const
{
  assert(_heap.size() > 0);
  return _heap[0];
}
void PushHeap(const T& elem)//向堆中插入元素
{
  //先将新节点插入末尾
  _heap.push_back(elem);
  //在将最后一个元素向上调整
  AdjustUp(_heap.size() - 1);
}
void PopHeap()//从堆中取出堆顶的元素
{
  assert(!Empty());//堆不为空
  //先交换堆顶节点与堆尾节点
  std::swap(_heap[0], _heap[_heap.size() - 1]);
  //删除最后一个节点
  _heap.pop_back();
  //从堆顶向下调整,存在多个节点时才调整
  if (_heap.size() > 1)
  {
   AdjustDown(0);
  }
}
void AdjustDown(size_t _root)//下滤
{
  size_t Parent = _root;//此时传入的_root为倒数第一个非叶子结点
  size_t Child = Parent * 2 + 1;//左孩子的下标等于父节点*2+1
  Compare _Compare;//仿函数用于比较优先级
  //左孩子存在
  while (Child < _heap.size())
  {
   //右孩子存在且右孩子的值大于左孩子
   if (Child + 1 < _heap.size() && _Compare(_heap[Child + 1], _heap[Child]))
   {
    Child = Child + 1;//让Child结点保存大值
   }
   //孩子结点的优先级大于父节点(Child的值大于Parent的值)
   if (_Compare(_heap[Child], _heap[Parent]))
   {
    std::swap(_heap[Child], _heap[Parent]);//交换两节点的值
    //下滤
    Parent = Child;//让父节点指向孩子节点
    Child = Parent * 2 + 1;//孩子节点指向孙子节点
   }
   //孩子结点的优先级小于父节点(Child的值小于Parent的值)，则不需要调整
   else
    return;
  }
}
void AdjustUp(size_t Child)//上滤
{
  //此时传入的Child为最后一个节点的坐标
  size_t Parent = (Child - 1) >> 1;
  Compare _Compare;//仿函数用于比较优先级
  //上滤的过程，只要没到根节点就一直上移
  while (Child > 0)
  {
   //孩子节点的优先级大于父节点的优先级
   if (_Compare(_heap[Child], _heap[Parent]))
   {
    std::swap(_heap[Child], _heap[Parent]);//交换节点的值
    //上滤
    Child = Parent;//孩子节点指向父节点
    Parent = (Child - 1) >> 1;//父节点指向爷爷节点
   }
   //孩子节点的优先级小于父节点的优先级，不需要调整
   else
    return;
  }
}
private:
std::vector<T> _heap;
};

#include<iostream>
#include"Heap.h"
using namespace std;

template<class T>
struct HuffmanTreeNode
{
typedef HuffmanTreeNode<T> Node;
T _weight;
Node* _left;
Node* _right;
Node* _parent;
HuffmanTreeNode(const T& w)
  :_weight(w),
  _left(NULL),
  _right(NULL),
  _parent(NULL)
{}
};

template<class T>
class HuffmanTree
{
public:
typedef HuffmanTreeNode<T> Node;
HuffmanTree()
  :_root(NULL)
{}
~HuffmanTree()
{
  _destory(_root);
}
Node* GetRoot()
{
  return _root;
}
template<class T>
struct Less
{
  bool operator()(const T& left, const T&right)
  {
   return left->_weight < right->_weight;
  }
};
HuffmanTree(T* a, int size, T invalid)   //构建Huffman树
{

  Myheap<Node*, Less<Node*>> hp;   //建小堆
  for (int i = 0; i<size; i++)
  {
   if (a[i] != invalid)
   {
    Node* tmp = new Node(a[i]);
    hp.PushHeap(tmp);
   }

  }
  while (hp.Size()>1)
  {
   Node* left = hp.Top();
   hp.PopHeap();

Node* right = hp.Top();
hp.PopHeap();

Node* parent = new Node(left->_weight + right->_weight);
hp.PushHeap(parent);

   parent->_left = left;
   parent->_right = right;
   left->_parent = parent;
   right->_parent = parent;
  }
  _root = hp.Top();
}
protected:
void _destory(Node* root)
{
  if (NULL == root)
   return;
  _destory(root->_left);
  _destory(root->_right);

delete root;
}
private:
Node* _root;
};

#pragma warning(disable:4996)
#include<assert.h>
#include<Windows.h>
#include<string>
#include<iostream>
#include"huffmanTree.h"
typedef long long type;
struct weight //权值里应该包含字符出现的次数以及对应的字符和Huffman编码
{
unsigned char _ch;
type _count;
string _code;

weight(type count = 0)
  : _ch(0)
  , _count(count)
  , _code("")
{}
weight operator+(const weight& w)
{
  type tmp = _count + w._count;
  return weight(tmp);
}
bool operator<(const weight& w)
{
  return _count < w._count;
}
bool operator!=(const weight& w)
{
  return !(_count == w._count);
}

};

class HuffmanPress
{
public:
HuffmanPress()
{
  for (int i = 0; i < 256; i++)
  {
   _infos[i]._ch = (unsigned char)i;
  }
}

bool FilePress(const char* filename)
{
  //统计出每个字符出现的次数。
  FILE* fOut = fopen(filename, "rb");
  assert(fOut);
  int ch = fgetc(fOut);
  type charcount = 0; //统计出字符出现的总次数
  while (ch != EOF)
  {
   if (feof(fOut))
    break;
   _infos[ch]._count++;
   ch = fgetc(fOut);
   charcount++;
  }
  weight invalid(0);
  HuffmanTree<weight> hf(_infos, 256, invalid);    //用得到的权重数组构建一个Huffman树
  HuffmanTreeNode<weight>* root = hf.GetRoot();
  //得到Huffman编码
  string code;
  _GetCodeR(root, code);
  //开始压缩,创建压缩后的文件
  string CompressFilename = filename;
  CompressFilename += ".huffman";
  FILE* fIn = fopen(CompressFilename.c_str(), "wb");
  assert(fIn);
  //统计完次数使得文件指针指向了最后，所以需要使指针指向文件头
  fseek(fOut, 0, SEEK_SET);
  //向压缩文件里写入Huffman编码
  int pos = 0;
  char value = 0;
  int ch1 = fgetc(fOut);
  while (ch1 != EOF)
  {
   if (feof(fOut))
    break;
   string& code = _infos[ch1]._code;
   for (size_t i = 0; i < code.size(); i++)
   {
    value <<= 1;
    if (code[i] == '1') //得到二进制的1
    {
     value |= 1;
    }
    if (++pos == 8)   //满8位写入文件
    {
     fputc(value, fIn);
     value = 0;
     pos = 0;
    }
   }

   ch1 = fgetc(fOut);
  }
  if (pos)    //最后的编码不满足一个字节
  {
   value = value << (8 - pos);
   fputc(value, fIn);
  }
  //将字符和字符出现的次数写进配置文件，文件解压时会用到
  string ConfigFilename = filename;
  ConfigFilename += ".config";
  FILE* fConfig = fopen(ConfigFilename.c_str(), "wb");
  assert(fConfig);
  char countStr[20]; //字符出现的次数
  //先把所有字符出现的总次数写进配置文件，为防止超过int范围，charcount使用的是long long 所以要分两步写入
  itoa(charcount >> 32, countStr, 10); //转换高位
  fputs(countStr, fConfig); //写入
  fputc('\n', fConfig);
  itoa(charcount & 0Xffffffff, countStr, 10); //转换低位
  fputs(countStr, fConfig); //写入
  fputc('\n', fConfig);
  for (int i = 0; i < 256; i++)
  {
   string put;
   if (_infos[i] != invalid)
   {
    fputc(_infos[i]._ch, fConfig);//必须先把ch放进去，如果把ch作为string的字符最后转换为C的字符，会导致'\0'没有处理
    put.push_back(',');
    itoa(_infos[i]._count, countStr, 10);
    put += countStr;
    fputs(put.c_str(), fConfig);
    fputc('\n', fConfig);
   }
  }
  fclose(fOut);
  fclose(fIn);
  fclose(fConfig);
  return true;
}
bool FileUncompress(char* filename) //这里给的是压缩文件名
{
  //1.读取配置文件
  string ConfigFilename = filename;
  int count = ConfigFilename.rfind('.');
  ConfigFilename = ConfigFilename.substr(0, count);
  string UnCompressname = ConfigFilename + ".unpress";
  FILE* fUnCompress = fopen(UnCompressname.c_str(), "wb"); //创建解压缩文件
  ConfigFilename += ".config";
  FILE* fconfig = fopen(ConfigFilename.c_str(), "rb");
  assert(fconfig);
  assert(fUnCompress);
  FILE* fpress = fopen(filename, "rb"); //打开压缩好的文件
  assert(fpress);

  type charcount = 0; //先读出字符出现的总次数
  string line;
  _ReadLine(fconfig, line);
  charcount = atoi(line.c_str());
  charcount <<= 32;
  line.clear();
  _ReadLine(fconfig, line);
  charcount += atoi(line.c_str());
  line.clear();
  while (_ReadLine(fconfig, line)) //文件结束时feof会返回0
  {
   if (!line.empty())
   {
    char ch = line[0];
    _infos[(unsigned char)ch]._count = atoi(line.substr(2).c_str());
    line.clear();
   }
   else //若读到一个空行，对应的字符为换行符
   {
    line += '\n';
   }
  }
  //2.再次构建Huffman树
  weight invalid(0);
  HuffmanTree<weight> hf(_infos, 256, invalid);    //用得到的权重数组构建一个Huffman树
  HuffmanTreeNode<weight>* root = hf.GetRoot();
  HuffmanTreeNode<weight>* cur = root;
  char ch = fgetc(fpress);
  int pos = 8;
  while (1)
  {
   --pos;
   if ((ch >> pos) & 1)
   {
    cur = cur->_right;
   }
   else
   {
    cur = cur->_left;
   }

   if (cur->_left == NULL&&cur->_right == NULL)
   {
    fputc(cur->_weight._ch, fUnCompress);
    cur = root;   //再次从根节点遍历
    charcount--;
   }
   if (pos == 0)
   {
    ch = fgetc(fpress);
    pos = 8;
   }
   if (charcount == 0) //不读取压缩时为了凑够一个字节而加进去的比特位
    break;
  }

  fclose(fconfig);
  fclose(fpress);
  fclose(fUnCompress);
  return true;
}

protected:
bool _ReadLine(FILE* filename, string& line)
{
  assert(filename);
  if (feof(filename))
   return false;
  unsigned char ch = fgetc(filename);

  while (ch != '\n')
  {
   line += ch;
   ch = fgetc(filename);

   if (feof(filename))
    //break;
    return false;
  }
  return true;
}

void _GetCodeR(HuffmanTreeNode<weight>* root, string code)
{
  if (NULL == root)
   return;
  if (root->_left == NULL&& root->_right == NULL)
  {
   _infos[root->_weight._ch]._code = code;

  }
  _GetCodeR(root->_left, code + '0');
  _GetCodeR(root->_right, code + '1');

}
public:
weight _infos[256];
};

#include"FileCompress.h"
void TestCompress()
{
HuffmanPress hft;
int begin = GetTickCount();
//hft.FilePress("test1.txt");
hft.FilePress("listen.mp3");
//hft.FilePress("1.jpg");
int end = GetTickCount();
cout << end - begin << endl;
}

void TestUnCompress()
{
HuffmanPress hf;
int begin = GetTickCount();
//hf.FileUncompress("test1.txt.huffman");
//hf.FileUncompress("1.jpg.huffman");
hf.FileUncompress("listen.mp3.huffman");
int end = GetTickCount();
cout << end - begin << endl;
}
int main()
{
TestCompress();
TestUnCompress();
return 0;
}

Adoiphs

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
文件压缩

#pragma once#include &lt;iostream&gt;#include &lt;vector&gt;using namespace std;template&lt;class T&gt;class Greate//大堆{public: bool operator()(const T&amp; left, const T&amp; right)//仿函数，重载()大于 { re...
复制链接

扫一扫