GZIP压缩
zlib是一个通用的压缩开源库,提供了在内存中压缩和解压的函数,包括对解压后数据的校验。目前版本的zlib只支持deflate方法,但是其它的方法将会被添加进来并且拥有同样的接口。
gzip也是一种数据压缩格式,可以大体分为头部,数据部和尾部三个部分,其中头部和尾部主要是一些文档属性和校验信息(rfc1952),数据部主要是用deflate方法压缩得到的数据。
zlib库默认的压缩方法并不是gzip的,而是zlib的,因此使用zlib压缩得到gzip格式的数据有两种方法:
- 使用zlib提供的gz***系列函数可以直接把想要的内容写入一个磁盘gzip文件;
- 如果想在内存中生成gzip格式的数据,可以在初始化的时候调用inflateInit2函数,并指定为gzip格式,代码如下:
|
之后,用deflate压缩出来的数据就是gzip的了
从 HTTP 的角度
1 客户端 在 http Request Header 上带上 Accept-Encoding:gzip,deflate
2 服务器若是支持 gzip 压缩则在 http reponse eader
部分返回 Content-Encoding: gzip 或者 Content-Type: application/x-gzip
3 将 body 部分用 gzip 解压缩 则得到网页内容 .
传说中 ie 有 bug 在处理 js css 压缩的时候有 bug, 我不理解 挺简单的怎么会有 bug 呢 .
从 gzip 的角度
gzip 是一种数据格式 默认且目前仅使用 deflate 算法压缩 data 部分
zlib 也是一种数据格式 , 使用 defalte 算法压缩数据部分 .
deflate 是一种压缩算法 , 是 huffman 编码的一种加强
zlib 是一个开源库 , 提供 deflate 压缩和对应的 infalte 解压缩 .
不过 zlib 默认的 deflate infalte 默认是处理 zlib 格式数据 . 必须使用
deflateInit2(&strm, DEFAULT_COMPRESSION,Z_DEFLATED, DEFAULT_WINDOWSIZE,DEFAULT_MEMLEVEL, Z_DEFAULT_STRATEGY);
初始化才是处理 raw deflate data.( 这一点在 zlib manul 没有提 , 在 faq 中提到 , 困扰了我好久 , 还是同事 L 帮我调试发现 )
至于 gzip 格式解析 对着 RFC 写就可以了 .
参见 RFC 1950 关于 zlib http://www.faqs.org/rfcs/rfc1950.html
RFC 1951 关于 deflate http://www.faqs.org/rfcs/rfc1951.html
RFC 1952 关于 gzip http://www.faqs.org/rfcs/rfc1952.html
nt CGzip::Ungzip(const std::string & inStr , std::string &outStr){
static int nFileCount=0;
nFileCount++;
string strZipFileName="test";
// CConvert::StrToFile(inStr,strZipFileName+CConvert::toString<int>(nFileCount)+"H.gzip" ;
if(inStr.length()<11){
return -1;
}
//process gzip header
unsigned int skipCt = 10;
unsigned int skipZeroCt = 0;
unsigned char ID1 = inStr[0];
unsigned char ID2 = inStr[1];
unsigned char XFL=inStr[8];
bool bFEXTRA = false ;
bool bFNAME = false ;
bool bFCOMMENT = false ;
bool bFHCRC = false ;
unsigned int XLEN = 0;
if( (ID1!=31) && (ID2!=139)){
return -1; // 非 gzip 头部
}
unsigned char CM = inStr[2];
if(CM!= {
return -1; // 现在都只处理 deflate 压缩的
}
unsigned char FLG = inStr[3];
if( (FLG & GZIP_HEAD_FEXTRA) != 0){
bFEXTRA = true ;
skipCt += 2;
XLEN = inStr[10]+ inStr[11]*256 ;// 按照小端字节序列处理
skipCt += XLEN;
}
if( (FLG & GZIP_HEAD_FNAME) != 0){
bFNAME = true;
skipZeroCt++;
}
if( (FLG & GZIP_HEAD_FCOMMENT) != 0){
bFCOMMENT = true;
skipZeroCt++;
}
size_t passedZeroCt = 0;
size_t iStep = skipCt ;
for( size_t iStep = skipCt ; iStep<inStr.length(); iStep++){
if(passedZeroCt>=skipZeroCt){
break;
}
if(inStr[iStep]=='' {
passedZeroCt++;
}
}
skipCt = iStep ;
if( (FLG & GZIP_HEAD_FHCRC) != 0){
bFHCRC = true;
skipCt+=2 ;
}
string coreStr = inStr.substr(skipCt,inStr.length()-8-skipCt);
return CGzip::Inflate(coreStr,outStr);
}
int CGzip: ogzip(const std::string & inStr , std::string &outStr){
char pAddHead[10];
unsigned long crc = 0;
// gzip header
static const char deflate_magic[2] = {'37', '/213'};
snprintf(pAddHead, 10,
"%c%c%c%c%c%c%c%c%c%c", deflate_magic[0],
deflate_magic[1], Z_DEFLATED, 0 /* flags */,
0, 0, 0, 0 /* 4 chars for mtime */,
0 /* xflags */, 0xff);
string addHead(pAddHead,10);
//gzip's raw deflate body
if(CGzip: eflate(inStr,outStr)<0){
return - 1;
}
//gzip trailer
crc = crc32(crc, (const Bytef*)inStr.data(), inStr.length());
char tailBuf[8];
memcpy(tailBuf, &crc, 4);
int isize=inStr.size();
memcpy(tailBuf,&isize,4);
string tailStr(tailBuf , 8 );
outStr = addHead + outStr+tailStr; //
return outStr.length(); //
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <zlib.h>
/* Compress data */
int zcompress(Bytef *data, uLong ndata,
Bytef *zdata, uLong *nzdata)
{
z_stream c_stream;
int err = 0;
if(data && ndata > 0)
{
c_stream.zalloc = (alloc_func)0;
c_stream.zfree = (free_func)0;
c_stream.opaque = (voidpf)0;
if(deflateInit(&c_stream, Z_DEFAULT_COMPRESSION) != Z_OK) return -1;
c_stream.next_in = data;
c_stream.avail_in = ndata;
c_stream.next_out = zdata;
c_stream.avail_out = *nzdata;
while (c_stream.avail_in != 0 && c_stream.total_out < *nzdata)
{
if(deflate(&c_stream, Z_NO_FLUSH) != Z_OK) return -1;
}
if(c_stream.avail_in != 0) return c_stream.avail_in;
for (;;) {
if((err = deflate(&c_stream, Z_FINISH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(deflateEnd(&c_stream) != Z_OK) return -1;
*nzdata = c_stream.total_out;
return 0;
}
return -1;
}
/* Compress gzip data */
int gzcompress(Bytef *data, uLong ndata,
Bytef *zdata, uLong *nzdata)
{
z_stream c_stream;
int err = 0;
if(data && ndata > 0)
{
c_stream.zalloc = (alloc_func)0;
c_stream.zfree = (free_func)0;
c_stream.opaque = (voidpf)0;
if(deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1;
c_stream.next_in = data;
c_stream.avail_in = ndata;
c_stream.next_out = zdata;
c_stream.avail_out = *nzdata;
while (c_stream.avail_in != 0 && c_stream.total_out < *nzdata)
{
if(deflate(&c_stream, Z_NO_FLUSH) != Z_OK) return -1;
}
if(c_stream.avail_in != 0) return c_stream.avail_in;
for (;;) {
if((err = deflate(&c_stream, Z_FINISH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(deflateEnd(&c_stream) != Z_OK) return -1;
*nzdata = c_stream.total_out;
return 0;
}
return -1;
}
/* Uncompress data */
int zdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream; /* decompression stream */
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit(&d_stream) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
/* HTTP gzip decompress */
int httpgzdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
/* Uncompress gzip data */
int gzdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit2(&d_stream, -MAX_WBITS) != Z_OK) return -1;
//if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
#ifdef _DEBUG_ZSTREAM
#define BUF_SIZE 65535
int main()
{
char *data = "kjdalkfjdflkjdlkfjdklfjdlkfjlkdjflkdjflddajfkdjfkdfaskf;ldsfk;ldakf;ldskfl;dskf;ld";
uLong ndata = strlen(data);
Bytef zdata[BUF_SIZE];
uLong nzdata = BUF_SIZE;
Bytef odata[BUF_SIZE];
uLong nodata = BUF_SIZE;
memset(zdata, 0, BUF_SIZE);
//if(zcompress((Bytef *)data, ndata, zdata, &nzdata) == 0)
if(gzcompress((Bytef *)data, ndata, zdata, &nzdata) == 0)
{
fprintf(stdout, "nzdata:%d %s/n", nzdata, zdata);
memset(odata, 0, BUF_SIZE);
//if(zdecompress(zdata, ndata, odata, &nodata) == 0)
if(gzdecompress(zdata, ndata, odata, &nodata) == 0)
{
fprintf(stdout, "%d %s/n", nodata, odata);
}
}
}
/*
获取gzip文件解压后文件大小
*/
#include < stdio.h >
int main()
{
FILE * pFile = NULL;
int nRes = 0 ;
int nUnCompressSize = 0 ;
pFile = fopen( " test.gz " , " r " );
if ( ! pFile )
goto Exit0;
nRes = fseek(pFile, - 4 , SEEK_END);
if (nRes)
goto Exit0;
fread( & nUnCompressSize, sizeof ( int ), 4 ,pFile);
printf( " uncompress file size %d /n " ,nUnCompressSize);
Exit0:
if (pFile)
{
fclose(pFile);
}
return 0 ;
}
#endif