理论知识
增量压缩 适合信号压缩。最原始的思路比较简单,假设原始数据是一个int序列,经过增量压缩时,第一个整数仍然用4个字节表示,后续的整数可以 保存增量,增量用一个
signed char表示,注意增量可正可负。举个例子:
原始序列:P0, P1, P2, P3
压缩序列:P0, D1, D2, D3
解压 压缩序列时:
P1 = P0 + D1;
P2 = P0 + D1 + D2;
P3 = P0 + D1 + D2 + D3;
计算压缩比:
原始序列大小:4 * sizeof(int) = 16
压缩序列大小:4 + 3*sizeof(char) = 7;
压缩比 为 7 / 16 = 0.44
上面示例中假设数据连续性很好,没有突变的数据。,在实际使用中,需要考虑突变数据,即压缩序列中的 某个Di 数据可能超出char的表示范围[-128,127],此时可以使用填充转义符。
假设下面序列中P4是个突变数据,计算出来的D4超出charr范围,此时需要用4个字节存储P4,后面继续增量压缩。
原始序列:P0, P1, P2, P3, P4, P5, P6...
压缩序列:P0, D1, D2, D3, 0x7F, P4, D1, D2...
解压压缩序列时:
P1 = P0 + D1;
P2 = P0 + D1 + D2;
P3 = P0 + D1 + D2 + D3;
P4 = P4;
P5 = P4 + D1;
P6 = P4 + D1 + D2;
代码实现
typedef std::vector<char> BufferType;
static void write_Short( short sData, BufferType& data )
{
char* pTemp = (char*)&sData;
data.push_back( *(pTemp++) );
data.push_back( *(pTemp) );
}
static void write_Byte( char sData, BufferType& data )
{
char* pTemp = (char*)&sData;
data.push_back( *(pTemp++) );
// data.push_back( *(pTemp) );
}
class DeltaCompressor
{
public:
enum Param
{
MaxBitLen = 24,
Threshold = 127
};
DeltaCompressor()
: compressRate(0.0)
{}
// oBuffer中0x7F表示转义符
template< class Type>
float compress(Type* iBuffer, int length, BufferType& oBuffer)
{
if (length < 2)
return 1.0;
Type first = iBuffer[0];
Type accum = first;
write_Short(first, oBuffer);
for (int i=1; i<length; i++)
{
int delta = iBuffer[i] - accum;
if ( isRight(delta) )
{
write_Byte(delta, oBuffer);
accum += delta;
}
else
{
// fill 0x7F
write_Byte(0x7F, oBuffer);
write_Short(iBuffer[i], oBuffer);
accum = iBuffer[i];
}
}
compressRate = 1.0f * oBuffer.size() / ( sizeof(Type)*length ) ;
return compressRate;
}
template<class Type>
void uncompress(BufferType& iBuffer, Type* oBuffer, int& length)
{
int oIdx = 0;
int iIdx = 0;
// read first absolute coordinate
Type delta = read_short(&iBuffer[iIdx]);
iIdx += 2;
oBuffer[oIdx++] = delta;
while (iIdx<iBuffer.size())
{
if (iBuffer[iIdx]==0x7F)
{
iIdx++; // 跳过转义符
delta = read_short(&iBuffer[iIdx]);
oBuffer[oIdx++] = delta;
iIdx += 2;
}
else
{
char temp = read_byte(&iBuffer[iIdx++]);
oBuffer[oIdx++] = delta + temp;
delta += temp;
}
}
length = oIdx;
}
private:
bool isRight(int delta)
{
return ( delta > -128 && delta < 127 );
}
float compressRate;
};
测试代码
short srcBuffer[] = {25,23,23,29,29,43,43,51,51,53,53,58,58,59,59,64,64,62,62,60,55,79,86,100,100,145,145,148,148,147,147,205,205,248,248,230,169,211,269,228,167,178,216,195,223,223,201,201,243,243,245,245,257,257,277,277,297,297,294,294,285};
short indexBuffer[] = {25, 23, 23, 29, 29, 43};
int length = sizeof(srcBuffer) / sizeof(srcBuffer[0]);
vector<char> compressedData;
DeltaCompressor dCompr;
float rate = dCompr.compress(srcBuffer, length, compressedData);
short* unComprBuffer = new short[length];
int unComprLen = 0;
dCompr.uncompress(compressedData, unComprBuffer, unComprLen);
assert(unComprLen==length);
int i=0;
for (i=0; i<length; i++)
{
if (unComprBuffer[i] != srcBuffer[i])
printf("ERROR IN %d\n", i);
}
i == length ? printf("SUCCESS\n") : printf("FAILED\n");
printf("Compress Rate = %f\n", rate);
输出压缩比为:
SUCCESS
Compress Rate = 0.508197
参考:http://hi.baidu.com/gilbertjuly/item/93c6543196db13322e0f8150 数据压缩