老林的C语言新课, 想快速入门点此 <C 语言编程核心突破>
游程编码
如果一个二进制数据是大量重复的 0000… 连接大量的 1111… ,我们就可以用每段数字的长度进行描述,压缩数据。
比如 00000000 11111111 00000000 00000000 00000000 00000000 可以被描述为 8 8 32.
代表8个0,连接8个1,连接32个0.
对于一些杂乱的信息,可能没有太多的压缩能力,但对于位图这种比较特殊的结构数据,则可能存在较为适应的压缩。
位图
我们所见的黑白位图,最朴素的描述方法是 长 * 宽 bool 矩阵,比如:
4 * 4 矩阵:
0000
1010
0101
1111
而位图,通常要大很多,如果连续的重复序列非常多,就可以用游程编码进行压缩了。
具体算法
我们通常用一个固定位数的数值,比如char的8位表示长度,通过二进制读取原文件,一个个数重复的序列的长度,将长度用char表示,但由于位数原因,char最多表示255的长度,更长的情况,则用中间插0的方式解决。
比如我有265个连续的0,此时就用 255 0 10 表示,255个连续的0,0个连续的1,10个连续的0.
代码:还是二进制读写,略改动,这个玩具类设计很不C++,使用中还是有陷阱,仅供参考。
#ifndef BINSTDIO
#define BINSTDIO
#include <bitset>
#include <cassert>
#include <iostream>
#include <string>
namespace BIO
{
struct cbin
{
//单例模式,不可实体化
cbin() = delete;
//比特流是否为空
static auto isEmpty() -> bool
{
return std::cin.eof();
}
//读取1位数据并返回一个bool值
static auto readBool() -> bool
{
if (isEmpty())
{
std::cerr << "EOF" << std::endl;
buffer = -1;
N = -1;
}
if (N == 0)
{
fillBuffer();
}
N--;
bool bit = buffer[N];
return bit;
}
//读取8位数据并返回一个char值
static auto readChar() -> unsigned char
{
if (isEmpty())
{
std::cerr << "EOF" << std::endl;
buffer = -1;
N = -1;
}
if (N == buffersize)
{
auto x = buffer;
fillBuffer();
return x.to_ulong() & bufferfill;
}
auto x = buffer;
x <<= (buffersize - N);
int oldN = N;
fillBuffer();
if (isEmpty())
{
std::cerr << "EOF" << std::endl;
buffer = -1;
N = -1;
}
N = oldN;
x |= (buffer >> N);
return x.to_ulong() & bufferfill;
}
//读取r(1~16)位数据并返回一个char值
static auto readChar(int r) -> unsigned char
{
if (r < 1 || r > 2 * buffersize)
{
throw std::string("Illegal value for r = ") + std::to_string(r);
}
if (r == buffersize)
{
return readChar();
}
unsigned char x = 0;
for (int i = 0; i != r; ++i)
{
x <<= 1;
bool bit = readBool();
if (bit)
{
x |= 1;
}
}
return x;
}
static auto readInt() -> int
{
int x = 0;
for (int i = 0; i < 4; i++)
{
unsigned char c = readChar();
x <<= buffersize;
x |= c;
}
return x;
}
//关闭比特流
static void close()
{
std::cin.setstate(std::ios::eofbit);
}
//静态初始化
static void begin()
{
fillBuffer();
}
private:
//读流到缓冲区
static void fillBuffer()
{
if (std::cin.read(reinterpret_cast<char *>(&buffer),
sizeof(unsigned char)))
{
N = buffersize;
}
else
{
std::cerr << "EOF" << std::endl;
buffer = -1;
N = -1;
}
}
static constexpr int buffersize = 8;
static constexpr unsigned int bufferfill = 0xff;
//缓冲区
static std::bitset<buffersize> buffer;
//缓冲区比特指针
static int N;
};
constexpr int buffersize = 8;
std::bitset<buffersize> cbin::buffer;
int cbin::N;
struct cbout
{
cbout() = delete;
//刷新缓冲流
static void flush()
{
clearBuffer();
std::cout.flush();
}
//写入指定的比特
static void write(bool bit)
{
writeBit(bit);
}
//写入指定的8位字符
static void write(unsigned char c)
{
writeByte(c);
}
static void write(unsigned int x)
{
writeByte((x >> 3 * buffersize) & bufferfill);
writeByte((x >> 2 * buffersize) & bufferfill);
writeByte((x >> buffersize) & bufferfill);
writeByte((x >> 0) & bufferfill);
}
//写入指定字符的第r(1~16)位
static void write(unsigned char c, int r)
{
if (r == buffersize)
{
write(c);
}
if (r < 1 || r > 2 * buffersize)
{
throw std::string("Illegal value for r = ") + std::to_string(r);
}
for (int i = 0; i != r; ++i)
{
bool bit = ((c >> (r - i - 1)) & 1) == 1;
writeBit(bit);
}
}
//关闭比特流
static void close()
{
clearBuffer();
std::cout.flush();
}
private:
static void writeBit(bool bit)
{
buffer <<= 1;
if (bit)
{
buffer |= 1;
}
N++;
if (N == buffersize)
{
clearBuffer();
}
}
static void writeByte(unsigned int x)
{
assert(x >= 0 && x < 256);
unsigned char c = static_cast<unsigned char>(x);
if (N == 0)
{
std::cout.write(reinterpret_cast<char *>(&c),
sizeof(unsigned char));
return;
}
for (int i = 0; i != buffersize; ++i)
{
bool bit = ((c >> (buffersize - i - 1)) & 1) == 1;
writeBit(bit);
}
}
static void clearBuffer()
{
if (N <= 0)
{
return;
}
if (N > 0)
{
buffer <<= (buffersize - N);
}
std::cout.write(reinterpret_cast<char *>(&buffer),
sizeof(unsigned char));
N = 0;
buffer = 0;
}
static constexpr unsigned int bufferfill = 0xff;
static constexpr unsigned int buffersize = 8;
//缓冲区
static std::bitset<buffersize> buffer;
//缓冲区比特指针
static int N;
};
std::bitset<buffersize> cbout::buffer;
int cbout::N;
inline void BinaryDump(char *argv[])
{
cbin::begin();
int width = std::stoi(argv[1]);
int cnt = 0;
for (cnt = 0; !cbin::isEmpty();)
{
if (width == 0)
{
continue;
}
if (cnt != 0 && cnt % width == 0)
{
std::cout << '\n';
}
if (cbin::readBool() && !cbin::isEmpty())
{
std::cout << '1';
++cnt;
}
else if (!cbin::isEmpty())
{
std::cout << '0';
++cnt;
}
}
std::cout << std::endl;
std::cout << cnt << " bits" << std::endl;
}
} // namespace BIO
#endif
游程编码代码:
#include "BinStdio_2.h"
#include <iostream>
#include <string>
#include <vector>
void expand()
{
bool b = false;
while (true)
{
unsigned char cnt = BIO::cbin::readChar();
if (!BIO::cbin::isEmpty())
{
for (int i = 0; i != cnt; ++i)
{
BIO::cbout::write(b);
}
b = !b;
}
else
{
break;
}
}
BIO::cbout::close();
}
void compress()
{
unsigned char cnt = 0;
bool b = false;
bool old = false;
while (true)
{
b = BIO::cbin::readBool();
if (!BIO::cbin::isEmpty())
{
if (b != old)
{
BIO::cbout::write(cnt);
cnt = 0;
old = !old;
}
else
{
if (cnt == 255)
{
BIO::cbout::write(cnt);
cnt = 0;
BIO::cbout::write(cnt);
}
}
cnt++;
}
else
{
break;
}
}
BIO::cbout::write(cnt);
BIO::cbout::close();
}
auto main(int /*argc*/, char *argv[]) -> int
{
// BIO::BinaryDump(argv);
if (argv[1][0] == '-')
{
compress();
}
if (argv[1][0] == '+')
{
expand();
}
return 0;
}
老林的C语言新课, 想快速入门点此 <C 语言编程核心突破>