docker pull 过程distribution源码 分析
承接上一篇“docker push 过程中distribution源码分析”本文分析一下distribution在docker pull时候的处理流程。所写内容为个人对distribution registry的理解,如有错误还请各位指出以便更正。
本文所涉及的存储信息是以本地文件系统为例进行分析说明。
docker pull的过程相比docker push要简单不少,其过程中首先是Get Manifest文件,然后将Manifest文件中的layer每一层都pull(GET)到本地即可
Pull Manifest源码分析
request example:
http://reg.lalalala.com/v2/dhc_cloud/docker-workflow-demo/manifests/latest
对应的方法:
request function: GET
request URL: /v2/**/manifests/$taginfo
request handler dispatch:func imageManifestDispatcher(ctx *Context, r *http.Request)
request handler: imageManifestHandler.GetImageManifest
根据app.register(v2.RouteNameManifest, imageManifestDispatcher) 所以该请求对应的dispatch是imageManifestDispatcher
这里我们之间看 GetImageManifest函数:
// GetImageManifest fetches the image manifest from the storage backend, if it exists.
func (imh *imageManifestHandler) GetImageManifest(w http.ResponseWriter, r *http.Request) {
ctxu.GetLogger(imh).Debug("GetImageManifest")
manifests, err := imh.Repository.Manifests(imh)
if err != nil {
imh.Errors = append(imh.Errors, err)
return
}
var manifest distribution.Manifest
if imh.Tag != "" {
tags := imh.Repository.Tags(imh)
desc, err := tags.Get(imh, imh.Tag)
if err != nil {
imh.Errors = append(imh.Errors, v2.ErrorCodeManifestUnknown.WithDetail(err))
return
}
imh.Digest = desc.Digest
}
if etagMatch(r, imh.Digest.String()) {
w.WriteHeader(http.StatusNotModified)
return
}
var options []distribution.ManifestServiceOption
if imh.Tag != "" {
options = append(options, distribution.WithTag(imh.Tag))
}
manifest, err = manifests.Get(imh, imh.Digest, options...)
if err != nil {
imh.Errors = append(imh.Errors, v2.ErrorCodeManifestUnknown.WithDetail(err))
return
}
supportsSchema2 := false
supportsManifestList := false
// this parsing of Accept headers is not quite as full-featured as godoc.org's parser, but we don't care about "q=" values
// https://github.com/golang/gddo/blob/e91d4165076d7474d20abda83f92d15c7ebc3e81/httputil/header/header.go#L165-L202
for _, acceptHeader := range r.Header["Accept"] {
// r.Header[...] is a slice in case the request contains the same header more than once
// if the header isn't set, we'll get the zero value, which "range" will handle gracefully
// we need to split each header value on "," to get the full list of "Accept" values (per RFC 2616)
// https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1
for _, mediaType := range strings.Split(acceptHeader, ",") {
// remove "; q=..." if present
if i := strings.Index(mediaType, ";"); i >= 0 {
mediaType = mediaType[:i]
}
// it's common (but not required) for Accept values to be space separated ("a/b, c/d, e/f")
mediaType = strings.TrimSpace(mediaType)
if mediaType == schema2.MediaTypeManifest {
supportsSchema2 = true
}
if mediaType == manifestlist.MediaTypeManifestList {
supportsManifestList = true
}
}
}
schema2Manifest, isSchema2 := manifest.(*schema2.DeserializedManifest)
manifestList, isManifestList := manifest.(*manifestlist.DeserializedManifestList)
// Only rewrite schema2 manifests when they are being fetched by tag.
// If they are being fetched by digest, we can't return something not
// matching the digest.
if imh.Tag != "" && isSchema2 && !supportsSchema2 {
// Rewrite manifest in schema1 format
ctxu.GetLogger(imh).Infof("rewriting manifest %s in schema1 format to support old client", imh.Digest.String())
manifest, err = imh.convertSchema2Manifest(schema2Manifest)
if err != nil {
return
}
} else if imh.Tag != "" && isManifestList && !supportsManifestList {
// Rewrite manifest in schema1 format
ctxu.GetLogger(imh).Infof("rewriting manifest list %s in schema1 format to support old client", imh.Digest.String())
// Find the image manifest corresponding to the default
// platform
var manifestDigest digest.Digest
for _, manifestDescriptor := range manifestList.Manifests {
if manifestDescriptor.Platform.Architecture == defaultArch && manifestDescriptor.Platform.OS == defaultOS {
manifestDigest = manifestDescriptor.Digest
break
}
}
if manifestDigest == "" {
imh.Errors = append(imh.Errors, v2.ErrorCodeManifestUnknown)
return
}
manifest, err = manifests.Get(imh, manifestDigest)
if err != nil {
imh.Errors = append(imh.Errors, v2.ErrorCodeManifestUnknown.WithDetail(err))
return
}
// If necessary, convert the image manifest
if schema2Manifest, isSchema2 := manifest.(*schema2.DeserializedManifest); isSchema2 && !supportsSchema2 {
manifest, err = imh.convertSchema2Manifest(schema2Manifest)
if err != nil {
return
}
}
}
ct, p, err := manifest.Payload()
if err != nil {
return
}
w.Header().Set("Content-Type", ct)
w.Header().Set("Content-Length", fmt.Sprint(len(p)))
w.Header().Set("Docker-Content-Digest", imh.Digest.String())
w.Header().Set("Etag", fmt.Sprintf(`"%s"`, imh.Digest))
w.Write(p)
}
这个函数有点长,我们一步一步分析。
1. manifests, err := imh.Repository.Manifests(imh)
该函数在”docker push 过程中distribution源码分析”中已经分析过了,构建了一个manifestStore的结构体。
2. tags := imh.Repository.Tags(imh)
该函数在”docker push 过程中distribution源码分析”中已经分析过了,构建了一个TagService的结构体。
3. desc, err := tags.Get(imh, imh.Tag)
这个函数是获取对应Tag目录下Current目录下的link文件的内容,也就是当前这个tag对应的Manifest文件的sha256编码,同时也就是这个Manifest文件的位置信息。
// resolve the current revision for name and tag.
func (ts *tagStore) Get(ctx context.Context, tag string) (distribution.Descriptor, error) {
currentPath, err := pathFor(manifestTagCurrentPathSpec{
name: ts.repository.Named().Name(),
tag: tag,
})
if err != nil {
return distribution.Descriptor{}, err
}
revision, err := ts.blobStore.readlink(ctx, currentPath)
if err != nil {
switch err.(type) {
case storagedriver.PathNotFoundError:
return distribution.Descriptor{}, distribution.ErrTagUnknown{Tag: tag}
}
return distribution.Descriptor{}, err
}
return distribution.Descriptor{Digest: revision}, nil
}
获取到link文件的内容后,进行校验。
manifest, err = manifests.Get(imh, imh.Digest, options…)
这个函数在前面函数获取到对应Manifest文件的sha256编码后获取对应的文件内容并解析成对应版本的Manifest文件格式。func (ms *manifestStore) Get(ctx context.Context, dgst digest.Digest, options ...distribution.ManifestServiceOption) (distribution.Manifest, error) { context.GetLogger(ms.ctx).Debug("(*manifestStore).Get") // TODO(stevvooe): Need to check descriptor from above to ensure that the // mediatype is as we expect for the manifest store. content, err := ms.blobStore.Get(ctx, dgst) if err != nil { if err == distribution.ErrBlobUnknown { return nil, distribution.ErrManifestUnknownRevision{ Name: ms.repository.Named().Name(), Revision: dgst, } } return nil, err } var versioned manifest.Versioned if err = json.Unmarshal(content, &versioned); err != nil { return nil, err } switch versioned.SchemaVersion { case 1: return ms.schema1Handler.Unmarshal(ctx, dgst, content) case 2: // This can be an image manifest or a manifest list switch versioned.MediaType { case schema2.MediaTypeManifest: return ms.schema2Handler.Unmarshal(ctx, dgst, content) case manifestlist.MediaTypeManifestList: return ms.manifestListHandler.Unmarshal(ctx, dgst, content) default: return nil, distribution.ErrManifestVerification{fmt.Errorf("unrecognized manifest content type %s", versioned.MediaType)} } } return nil, fmt.Errorf("unrecognized manifest schema version %d", versioned.SchemaVersion) }
可以看出其世界上的获取函数是blobStore.Get,获取的的是根目录下的Blobs/sha256/hex[0:2]/hex/data的内容,之后进行json解析。
之后 httprequest即可返回了。
至此Manifest文件下载完成。
Pull Blob layers源码分析
request example:
http://reg.lalalala.com/v2/dhc_cloud/docker-workflow-demo/blobs/sha256:3ec8eef909a1ece1622fda6b0f22414bafb24a88976331637718ad31874fe412
对应的方法:
request function: GET
request URL: /v2/**/blobs/sha256:id
request handler dispatch:blobDispatcher(ctx *Context, r *http.Request)
request handler: blobHandler.GetBlob
根据app.register(v2.RouteNameBlob, blobDispatcher) 所以该请求对应的dispatch是blobDispatcher
这里我们之间看 blobHandler.GetBlob函数:
func (bh *blobHandler) GetBlob(w http.ResponseWriter, r *http.Request) {
context.GetLogger(bh).Debug("GetBlob")
blobs := bh.Repository.Blobs(bh)
desc, err := blobs.Stat(bh, bh.Digest)
if err != nil {
if err == distribution.ErrBlobUnknown {
bh.Errors = append(bh.Errors, v2.ErrorCodeBlobUnknown.WithDetail(bh.Digest))
} else {
bh.Errors = append(bh.Errors, errcode.ErrorCodeUnknown.WithDetail(err))
}
return
}
if err := blobs.ServeBlob(bh, w, r, desc.Digest); err != nil {
context.GetLogger(bh).Debugf("unexpected error getting blob HTTP handler: %v", err)
bh.Errors = append(bh.Errors, errcode.ErrorCodeUnknown.WithDetail(err))
return
}
}
上面的源码内容很简单,调用了bh.Repository.Blobs(bh) 构建了以blobStore信息,在调用blobs.ServeBlob来提供blob~~或者说真正的处理函数在这里。
看一下bh.Repository.Blobs(bh)函数:
func (repo *repository) Blobs(ctx context.Context) distribution.BlobStore { var statter distribution.BlobDescriptorService = &linkedBlobStatter{ blobStore: repo.blobStore, repository: repo, linkPathFns: []linkPathFunc{blobLinkPath}, } if repo.descriptorCache != nil { statter = cache.NewCachedBlobStatter(repo.descriptorCache, statter) } if repo.registry.blobDescriptorServiceFactory != nil { statter = repo.registry.blobDescriptorServiceFactory.BlobAccessController(statter) } return &linkedBlobStore{ registry: repo.registry, blobStore: repo.blobStore, blobServer: repo.blobServer, blobAccessController: statter, repository: repo, ctx: ctx, // TODO(stevvooe): linkPath limits this blob store to only layers. // This instance cannot be used for manifest checks. linkPathFns: []linkPathFunc{blobLinkPath}, deleteEnabled: repo.registry.deleteEnabled, resumableDigestEnabled: repo.resumableDigestEnabled, } }
上面函数的内容很简单,不做解析,但是最后linkedBlobStore结果体很重要,后面的所有操作都以依赖这个对象的内容。
这里来看blobs.ServeBlob(bh, w, r, desc.Digest) 函数:
func (bs *blobServer) ServeBlob(ctx context.Context, w http.ResponseWriter, r *http.Request, dgst digest.Digest) error { desc, err := bs.statter.Stat(ctx, dgst) if err != nil { return err } path, err := bs.pathFn(desc.Digest) if err != nil { return err } if bs.redirect { redirectURL, err := bs.driver.URLFor(ctx, path, map[string]interface{}{"method": r.Method}) switch err.(type) { case nil: // Redirect to storage URL. http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect) return err case driver.ErrUnsupportedMethod: // Fallback to serving the content directly. default: // Some unexpected error. return err } } br, err := newFileReader(ctx, bs.driver, path, desc.Size) if err != nil { return err } defer br.Close() w.Header().Set("ETag", fmt.Sprintf(`"%s"`, desc.Digest)) // If-None-Match handled by ServeContent w.Header().Set("Cache-Control", fmt.Sprintf("max-age=%.f", blobCacheControlMaxAge.Seconds())) if w.Header().Get("Docker-Content-Digest") == "" { w.Header().Set("Docker-Content-Digest", desc.Digest.String()) } if w.Header().Get("Content-Type") == "" { // Set the content type if not already set. w.Header().Set("Content-Type", desc.MediaType) } if w.Header().Get("Content-Length") == "" { // Set the content length if not already set. w.Header().Set("Content-Length", fmt.Sprint(desc.Size)) } http.ServeContent(w, r, desc.Digest.String(), time.Time{}, br) return nil }
这个函数内容也非常简单,先调用bs.pathFn 获取对应文件的路径,然后调用newFileReader(ctx, bs.driver, path, desc.Size)创建一个fileReader对象,之后调用http.ServeContent结束http请求处理,真正的读取文件是在http.ServeContent中处理的。
至此docker pull过程中发送给distribution的两个http请求处理源码解析完毕。
这几篇distribution源码的分析过程中忽略了认证,notify,以及后端驱动本身,这里不做说明,以后看情况在做分析