web传输中gzip压缩:
浏览器和Web服务器对于压缩网页通信过程:如果浏览器能够接受压缩后的网页内容,那么他会在请求中发送Accept-Encoding请求报头值为"gzip.deflate" 表明浏览器支持gzip和deflate这两种压缩方式,Web服务器读取Accept-Encoding请求报头的值来判断浏览器是否接受压缩的内容,如果接受 Web服务器就将目标页面的响应内容采用gzip压缩方式压缩后再发送到客户端,同时设置Content-Encoding实体报头值为gzip以告知浏览器实体正文采用了gzip的压缩编码。可以通过过滤器对内容压缩,原理是用包装类对象替换原始响应对象。
Gzip middleware for Negroni 源码分析
代码中涉及到compress/gzip中的常量和方法,compress/gzip的压缩解压缩源码贴在博客最后以供参考。
1.const
const (
encodingGzip = "gzip"
headerAcceptEncoding = "Accept-Encoding"
headerContentEncoding = "Content-Encoding"
headerContentLength = "Content-Length"
headerContentType = "Content-Type"
headerVary = "Vary"
headerSecWebSocketKey = "Sec-WebSocket-Key"
BestCompression = gzip.BestCompression
BestSpeed = gzip.BestSpeed
DefaultCompression = gzip.DefaultCompression
NoCompression = gzip.NoCompression
)
其中,
NoCompression = 0
BestSpeed = 1
BestCompression = 9
DefaultCompression = -1
代表压缩的level,不能超过BestCompression
2.结构体gzipResponseWriter
type gzipResponseWriter struct {
w *gzip.Writer
negroni.ResponseWriter
wroteHeader bool
}
wroteHeader代表response是否已经编码
3.func WriteHeader
func (grw *gzipResponseWriter) WriteHeader(code int) {
headers := grw.ResponseWriter.Header()
if headers.Get(headerContentEncoding) == "" {
headers.Set(headerContentEncoding, encodingGzip)
headers.Add(headerVary, headerAcceptEncoding)
} else {
grw.w.Reset(ioutil.Discard)
grw.w = nil
}
grw.ResponseWriter.WriteHeader(code)
grw.wroteHeader = true
}
如果目标页面的响应内容未预编码,采用gzip压缩方式压缩后再发送到客户端,同时设置Content-Encoding实体报头值为gzip,否则在写之前令gzipWriter失效(var Discard io.Writer = devNull(0),使得它对任何写调用无条件成功)
4.func Write
func (grw *gzipResponseWriter) Write(b []byte) (int, error) {
if !grw.wroteHeader {
grw.WriteHeader(http.StatusOK)
}
if grw.w == nil {
return grw.ResponseWriter.Write(b)
}
if len(grw.Header().Get(headerContentType)) == 0 {
grw.Header().Set(headerContentType, http.DetectContentType(b))
}
return grw.w.Write(b)
}
写内容的函数,1.报头未写 -> WriteHeader() 2.gzipWriter没有,说明不gzip压缩 -> ResponseWriter写,返回 3.报头未设置 -> 通过net/http库函数自动检测内容类型设置 4.gzipWriter写
5.结构体handler
type handler struct {
pool sync.Pool
}
临时对象池:gzip对象
6.func Gzip
func Gzip(level int) *handler {
h := &handler{}
h.pool.New = func() interface{} {
gz, err := gzip.NewWriterLevel(ioutil.Discard, level)
if err != nil {
panic(err)
}
return gz
}
return h
}
Gzip函数返回了一个处理gzip压缩的handler,新建了一个gzip,其中writer为不可用,设置level7.func ServeHTTP
func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next http.HandlerFunc) {
// Skip compression if the client doesn't accept gzip encoding.
if !strings.Contains(r.Header.Get(headerAcceptEncoding), encodingGzip) {
next(w, r)
return
}
// Skip compression if client attempt WebSocket connection
if len(r.Header.Get(headerSecWebSocketKey)) > 0 {
next(w, r)
return
}
// Retrieve gzip writer from the pool. Reset it to use the ResponseWriter.
// This allows us to re-use an already allocated buffer rather than
// allocating a new buffer for every request.
// We defer g.pool.Put here so that the gz writer is returned to the
// pool if any thing after here fails for some reason (functions in
// next could potentially panic, etc)
gz := h.pool.Get().(*gzip.Writer)
defer h.pool.Put(gz)
gz.Reset(w)
// Wrap the original http.ResponseWriter with negroni.ResponseWriter
// and create the gzipResponseWriter.
nrw := negroni.NewResponseWriter(w)
grw := gzipResponseWriter{gz, nrw, false}
// Call the next handler supplying the gzipResponseWriter instead of
// the original.
next(&grw, r)
// Delete the content length after we know we have been written to.
grw.Header().Del(headerContentLength)
gz.Close()
}
处理handler中压缩请求的函数,1.浏览器不接受gzip编码 -> 跳过 2.浏览器尝试进行socket连接 -> 跳过
3.复用gzipWriter,创建gzipResponseWriter,传给下一个handler,删除报头“Content-Length”字段,关掉 gzipwriter
源码
// Package gzip implements a gzip compression handler middleware for Negroni.
package gzip
import (
"compress/gzip"
"io/ioutil"
"net/http"
"strings"
"sync"
"github.com/urfave/negroni"
)
// These compression constants are copied from the compress/gzip package.
const (
encodingGzip = "gzip"
headerAcceptEncoding = "Accept-Encoding"
headerContentEncoding = "Content-Encoding"
headerContentLength = "Content-Length"
headerContentType = "Content-Type"
headerVary = "Vary"
headerSecWebSocketKey = "Sec-WebSocket-Key"
BestCompression = gzip.BestCompression
BestSpeed = gzip.BestSpeed
DefaultCompression = gzip.DefaultCompression
NoCompression = gzip.NoCompression
)
// gzipResponseWriter is the ResponseWriter that negroni.ResponseWriter is
// wrapped in.
type gzipResponseWriter struct {
w *gzip.Writer
negroni.ResponseWriter
wroteHeader bool
}
// Check whether underlying response is already pre-encoded and disable
// gzipWriter before the body gets written, otherwise encoding headers
func (grw *gzipResponseWriter) WriteHeader(code int) {
headers := grw.ResponseWriter.Header()
if headers.Get(headerContentEncoding) == "" {
headers.Set(headerContentEncoding, encodingGzip)
headers.Add(headerVary, headerAcceptEncoding)
} else {
grw.w.Reset(ioutil.Discard)
grw.w = nil
}
grw.ResponseWriter.WriteHeader(code)
grw.wroteHeader = true
}
// Write writes bytes to the gzip.Writer. It will also set the Content-Type
// header using the net/http library content type detection if the Content-Type
// header was not set yet.
func (grw *gzipResponseWriter) Write(b []byte) (int, error) {
if !grw.wroteHeader {
grw.WriteHeader(http.StatusOK)
}
if grw.w == nil {
return grw.ResponseWriter.Write(b)
}
if len(grw.Header().Get(headerContentType)) == 0 {
grw.Header().Set(headerContentType, http.DetectContentType(b))
}
return grw.w.Write(b)
}
// handler struct contains the ServeHTTP method
type handler struct {
pool sync.Pool
}
// Gzip returns a handler which will handle the Gzip compression in ServeHTTP.
// Valid values for level are identical to those in the compress/gzip package.
func Gzip(level int) *handler {
h := &handler{}
h.pool.New = func() interface{} {
gz, err := gzip.NewWriterLevel(ioutil.Discard, level)
if err != nil {
panic(err)
}
return gz
}
return h
}
// ServeHTTP wraps the http.ResponseWriter with a gzip.Writer.
func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next http.HandlerFunc) {
// Skip compression if the client doesn't accept gzip encoding.
if !strings.Contains(r.Header.Get(headerAcceptEncoding), encodingGzip) {
next(w, r)
return
}
// Skip compression if client attempt WebSocket connection
if len(r.Header.Get(headerSecWebSocketKey)) > 0 {
next(w, r)
return
}
// Retrieve gzip writer from the pool. Reset it to use the ResponseWriter.
// This allows us to re-use an already allocated buffer rather than
// allocating a new buffer for every request.
// We defer g.pool.Put here so that the gz writer is returned to the
// pool if any thing after here fails for some reason (functions in
// next could potentially panic, etc)
gz := h.pool.Get().(*gzip.Writer)
defer h.pool.Put(gz)
gz.Reset(w)
// Wrap the original http.ResponseWriter with negroni.ResponseWriter
// and create the gzipResponseWriter.
nrw := negroni.NewResponseWriter(w)
grw := gzipResponseWriter{gz, nrw, false}
// Call the next handler supplying the gzipResponseWriter instead of
// the original.
next(&grw, r)
// Delete the content length after we know we have been written to.
grw.Header().Del(headerContentLength)
gz.Close()
}
compress/gzip压缩源码(ps:找gzip这个包就找了好久,最后在ubuntu下的 /home/usr/lib/go-1.6/src/compress/gzip 目录下找到的)
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"compress/flate"
"errors"
"fmt"
"hash"
"hash/crc32"
"io"
)
// These constants are copied from the flate package, so that code that imports
// "compress/gzip" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
)
// A Writer is an io.WriteCloser.
// Writes to a Writer are compressed and written to w.
type Writer struct {
Header // written at first call to Write, Flush, or Close
w io.Writer
level int
wroteHeader bool
compressor *flate.Writer
digest hash.Hash32
size uint32
closed bool
buf [10]byte
err error
}
// NewWriter returns a new Writer.
// Writes to the returned writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the WriteCloser when done.
// Writes may be buffered and not flushed until Close.
//
// Callers that wish to set the fields in Writer.Header must do so before
// the first call to Write, Flush, or Close.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevel(w, DefaultCompression)
return z
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
//
// The compression level can be DefaultCompression, NoCompression, or any
// integer value between BestSpeed and BestCompression inclusive. The error
// returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
if level < DefaultCompression || level > BestCompression {
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
}
z := new(Writer)
z.init(w, level)
return z, nil
}
func (z *Writer) init(w io.Writer, level int) {
digest := z.digest
if digest != nil {
digest.Reset()
} else {
digest = crc32.NewIEEE()
}
compressor := z.compressor
if compressor != nil {
compressor.Reset(w)
}
*z = Writer{
Header: Header{
OS: 255, // unknown
},
w: w,
level: level,
digest: digest,
compressor: compressor,
}
}
// Reset discards the Writer z's state and makes it equivalent to the
// result of its original state from NewWriter or NewWriterLevel, but
// writing to w instead. This permits reusing a Writer rather than
// allocating a new one.
func (z *Writer) Reset(w io.Writer) {
z.init(w, z.level)
}
// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
func put2(p []byte, v uint16) {
p[0] = uint8(v >> 0)
p[1] = uint8(v >> 8)
}
func put4(p []byte, v uint32) {
p[0] = uint8(v >> 0)
p[1] = uint8(v >> 8)
p[2] = uint8(v >> 16)
p[3] = uint8(v >> 24)
}
// writeBytes writes a length-prefixed byte slice to z.w.
func (z *Writer) writeBytes(b []byte) error {
if len(b) > 0xffff {
return errors.New("gzip.Write: Extra data is too large")
}
put2(z.buf[0:2], uint16(len(b)))
_, err := z.w.Write(z.buf[0:2])
if err != nil {
return err
}
_, err = z.w.Write(b)
return err
}
// writeString writes a UTF-8 string s in GZIP's format to z.w.
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
func (z *Writer) writeString(s string) (err error) {
// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
needconv := false
for _, v := range s {
if v == 0 || v > 0xff {
return errors.New("gzip.Write: non-Latin-1 header string")
}
if v > 0x7f {
needconv = true
}
}
if needconv {
b := make([]byte, 0, len(s))
for _, v := range s {
b = append(b, byte(v))
}
_, err = z.w.Write(b)
} else {
_, err = io.WriteString(z.w, s)
}
if err != nil {
return err
}
// GZIP strings are NUL-terminated.
z.buf[0] = 0
_, err = z.w.Write(z.buf[0:1])
return err
}
// Write writes a compressed form of p to the underlying io.Writer. The
// compressed bytes are not necessarily flushed until the Writer is closed.
func (z *Writer) Write(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
var n int
// Write the GZIP header lazily.
if !z.wroteHeader {
z.wroteHeader = true
z.buf[0] = gzipID1
z.buf[1] = gzipID2
z.buf[2] = gzipDeflate
z.buf[3] = 0
if z.Extra != nil {
z.buf[3] |= 0x04
}
if z.Name != "" {
z.buf[3] |= 0x08
}
if z.Comment != "" {
z.buf[3] |= 0x10
}
put4(z.buf[4:8], uint32(z.ModTime.Unix()))
if z.level == BestCompression {
z.buf[8] = 2
} else if z.level == BestSpeed {
z.buf[8] = 4
} else {
z.buf[8] = 0
}
z.buf[9] = z.OS
n, z.err = z.w.Write(z.buf[0:10])
if z.err != nil {
return n, z.err
}
if z.Extra != nil {
z.err = z.writeBytes(z.Extra)
if z.err != nil {
return n, z.err
}
}
if z.Name != "" {
z.err = z.writeString(z.Name)
if z.err != nil {
return n, z.err
}
}
if z.Comment != "" {
z.err = z.writeString(z.Comment)
if z.err != nil {
return n, z.err
}
}
if z.compressor == nil {
z.compressor, _ = flate.NewWriter(z.w, z.level)
}
}
z.size += uint32(len(p))
z.digest.Write(p)
n, z.err = z.compressor.Write(p)
return n, z.err
}
// Flush flushes any pending compressed data to the underlying writer.
//
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet. Flush does
// not return until the data has been written. If the underlying
// writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (z *Writer) Flush() error {
if z.err != nil {
return z.err
}
if z.closed {
return nil
}
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
z.err = z.compressor.Flush()
return z.err
}
// Close closes the Writer, flushing any unwritten data to the underlying
// io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if z.err != nil {
return z.err
}
if z.closed {
return nil
}
z.closed = true
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
z.err = z.compressor.Close()
if z.err != nil {
return z.err
}
put4(z.buf[0:4], z.digest.Sum32())
put4(z.buf[4:8], z.size)
_, z.err = z.w.Write(z.buf[0:8])
return z.err
}
compress/gzip解压缩源码
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gzip implements reading and writing of gzip format compressed files,
// as specified in RFC 1952.
package gzip
import (
"bufio"
"compress/flate"
"errors"
"hash"
"hash/crc32"
"io"
"time"
)
const (
gzipID1 = 0x1f
gzipID2 = 0x8b
gzipDeflate = 8
flagText = 1 << 0
flagHdrCrc = 1 << 1
flagExtra = 1 << 2
flagName = 1 << 3
flagComment = 1 << 4
)
func makeReader(r io.Reader) flate.Reader {
if rr, ok := r.(flate.Reader); ok {
return rr
}
return bufio.NewReader(r)
}
var (
// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
ErrChecksum = errors.New("gzip: invalid checksum")
// ErrHeader is returned when reading GZIP data that has an invalid header.
ErrHeader = errors.New("gzip: invalid header")
)
// The gzip file stores a header giving metadata about the compressed file.
// That header is exposed as the fields of the Writer and Reader structs.
//
// Strings must be UTF-8 encoded and may only contain Unicode code points
// U+0001 through U+00FF, due to limitations of the GZIP file format.
type Header struct {
Comment string // comment
Extra []byte // "extra data"
ModTime time.Time // modification time
Name string // file name
OS byte // operating system type
}
// A Reader is an io.Reader that can be read to retrieve
// uncompressed data from a gzip-format compressed file.
//
// In general, a gzip file can be a concatenation of gzip files,
// each with its own header. Reads from the Reader
// return the concatenation of the uncompressed data of each.
// Only the first header is recorded in the Reader fields.
//
// Gzip files store a length and checksum of the uncompressed data.
// The Reader will return a ErrChecksum when Read
// reaches the end of the uncompressed data if it does not
// have the expected length or checksum. Clients should treat data
// returned by Read as tentative until they receive the io.EOF
// marking the end of the data.
type Reader struct {
Header // valid after NewReader or Reader.Reset
r flate.Reader
decompressor io.ReadCloser
digest hash.Hash32
size uint32
flg byte
buf [512]byte
err error
multistream bool
}
// NewReader creates a new Reader reading the given reader.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
//
// It is the caller's responsibility to call Close on the Reader when done.
//
// The Reader.Header fields will be valid in the Reader returned.
func NewReader(r io.Reader) (*Reader, error) {
z := new(Reader)
z.r = makeReader(r)
z.multistream = true
z.digest = crc32.NewIEEE()
if err := z.readHeader(true); err != nil {
return nil, err
}
return z, nil
}
// Reset discards the Reader z's state and makes it equivalent to the
// result of its original state from NewReader, but reading from r instead.
// This permits reusing a Reader rather than allocating a new one.
func (z *Reader) Reset(r io.Reader) error {
z.r = makeReader(r)
if z.digest == nil {
z.digest = crc32.NewIEEE()
} else {
z.digest.Reset()
}
z.size = 0
z.err = nil
z.multistream = true
return z.readHeader(true)
}
// Multistream controls whether the reader supports multistream files.
//
// If enabled (the default), the Reader expects the input to be a sequence
// of individually gzipped data streams, each with its own header and
// trailer, ending at EOF. The effect is that the concatenation of a sequence
// of gzipped files is treated as equivalent to the gzip of the concatenation
// of the sequence. This is standard behavior for gzip readers.
//
// Calling Multistream(false) disables this behavior; disabling the behavior
// can be useful when reading file formats that distinguish individual gzip
// data streams or mix gzip data streams with other data streams.
// In this mode, when the Reader reaches the end of the data stream,
// Read returns io.EOF. If the underlying reader implements io.ByteReader,
// it will be left positioned just after the gzip stream.
// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
// If there is no next stream, z.Reset(r) will return io.EOF.
func (z *Reader) Multistream(ok bool) {
z.multistream = ok
}
// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
func get4(p []byte) uint32 {
return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
}
func (z *Reader) readString() (string, error) {
var err error
needconv := false
for i := 0; ; i++ {
if i >= len(z.buf) {
return "", ErrHeader
}
z.buf[i], err = z.r.ReadByte()
if err != nil {
return "", err
}
if z.buf[i] > 0x7f {
needconv = true
}
if z.buf[i] == 0 {
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
if needconv {
s := make([]rune, 0, i)
for _, v := range z.buf[0:i] {
s = append(s, rune(v))
}
return string(s), nil
}
return string(z.buf[0:i]), nil
}
}
}
func (z *Reader) read2() (uint32, error) {
_, err := io.ReadFull(z.r, z.buf[0:2])
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return 0, err
}
return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
}
func (z *Reader) readHeader(save bool) error {
_, err := io.ReadFull(z.r, z.buf[0:10])
if err != nil {
// RFC1952 section 2.2 says the following:
// A gzip file consists of a series of "members" (compressed data sets).
//
// Other than this, the specification does not clarify whether a
// "series" is defined as "one or more" or "zero or more". To err on the
// side of caution, Go interprets this to mean "zero or more".
// Thus, it is okay to return io.EOF here.
return err
}
if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
return ErrHeader
}
z.flg = z.buf[3]
if save {
z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0)
// z.buf[8] is xfl, ignored
z.OS = z.buf[9]
}
z.digest.Reset()
z.digest.Write(z.buf[0:10])
if z.flg&flagExtra != 0 {
n, err := z.read2()
if err != nil {
return err
}
data := make([]byte, n)
if _, err = io.ReadFull(z.r, data); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
if save {
z.Extra = data
}
}
var s string
if z.flg&flagName != 0 {
if s, err = z.readString(); err != nil {
return err
}
if save {
z.Name = s
}
}
if z.flg&flagComment != 0 {
if s, err = z.readString(); err != nil {
return err
}
if save {
z.Comment = s
}
}
if z.flg&flagHdrCrc != 0 {
n, err := z.read2()
if err != nil {
return err
}
sum := z.digest.Sum32() & 0xFFFF
if n != sum {
return ErrHeader
}
}
z.digest.Reset()
if z.decompressor == nil {
z.decompressor = flate.NewReader(z.r)
} else {
z.decompressor.(flate.Resetter).Reset(z.r, nil)
}
return nil
}
func (z *Reader) Read(p []byte) (n int, err error) {
if z.err != nil {
return 0, z.err
}
if len(p) == 0 {
return 0, nil
}
n, err = z.decompressor.Read(p)
z.digest.Write(p[0:n])
z.size += uint32(n)
if n != 0 || err != io.EOF {
z.err = err
return
}
// Finished file; check checksum + size.
if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
z.err = err
return 0, err
}
crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8])
sum := z.digest.Sum32()
if sum != crc32 || isize != z.size {
z.err = ErrChecksum
return 0, z.err
}
// File is ok; is there another?
if !z.multistream {
return 0, io.EOF
}
if err = z.readHeader(false); err != nil {
z.err = err
return
}
// Yes. Reset and read from it.
z.digest.Reset()
z.size = 0
return z.Read(p)
}
// Close closes the Reader. It does not close the underlying io.Reader.
func (z *Reader) Close() error { return z.decompressor.Close() }