docker pull 过程distribution源码分析

最新推荐文章于 2023-08-25 16:56:43 发布

Sunny_blood

最新推荐文章于 2023-08-25 16:56:43 发布

阅读量2.3k

点赞数

分类专栏： golang docker distribution 文章标签：源码 docker 镜像仓库 pull

本文链接：https://blog.csdn.net/Daniel_greenspan/article/details/78857341

版权

golang 同时被 3 个专栏收录

7 篇文章 0 订阅

订阅专栏

docker

7 篇文章 0 订阅

订阅专栏

distribution

4 篇文章 0 订阅

订阅专栏

docker pull 过程distribution源码分析

承接上一篇“docker push 过程中distribution源码分析”本文分析一下distribution在docker pull时候的处理流程。所写内容为个人对distribution registry的理解，如有错误还请各位指出以便更正。
本文所涉及的存储信息是以本地文件系统为例进行分析说明。

docker pull的过程相比docker push要简单不少，其过程中首先是Get Manifest文件，然后将Manifest文件中的layer每一层都pull(GET)到本地即可

Pull Manifest源码分析

request example:
http://reg.lalalala.com/v2/dhc_cloud/docker-workflow-demo/manifests/latest

对应的方法：

request function: GET
request URL: /v2/**/manifests/$taginfo
request handler dispatch:func imageManifestDispatcher(ctx *Context, r *http.Request)
request handler： imageManifestHandler.GetImageManifest

根据app.register(v2.RouteNameManifest, imageManifestDispatcher) 所以该请求对应的dispatch是imageManifestDispatcher

这里我们之间看 GetImageManifest函数：

// GetImageManifest fetches the image manifest from the storage backend, if it exists.
func (imh *imageManifestHandler) GetImageManifest(w http.ResponseWriter, r *http.Request) {
    ctxu.GetLogger(imh).Debug("GetImageManifest")
    manifests, err := imh.Repository.Manifests(imh)
    if err != nil {
        imh.Errors = append(imh.Errors, err)
        return
    }

    var manifest distribution.Manifest
    if imh.Tag != "" {
        tags := imh.Repository.Tags(imh)
        desc, err := tags.Get(imh, imh.Tag)
        if err != nil {
            imh.Errors = append(imh.Errors, v2.ErrorCodeManifestUnknown.WithDetail(err))
            return
        }
        imh.Digest = desc.Digest
    }

    if etagMatch(r, imh.Digest.String()) {
        w.WriteHeader(http.StatusNotModified)
        return
    }

    var options []distribution.ManifestServiceOption
    if imh.Tag != "" {
        options = append(options, distribution.WithTag(imh.Tag))
    }
    manifest, err = manifests.Get(imh, imh.Digest, options...)
    if err != nil {
        imh.Errors = append(imh.Errors, v2.ErrorCodeManifestUnknown.WithDetail(err))
        return
    }

    supportsSchema2 := false
    supportsManifestList := false
    // this parsing of Accept headers is not quite as full-featured as godoc.org's parser, but we don't care about "q=" values
    // https://github.com/golang/gddo/blob/e91d4165076d7474d20abda83f92d15c7ebc3e81/httputil/header/header.go#L165-L202
    for _, acceptHeader := range r.Header["Accept"] {
        // r.Header[...] is a slice in case the request contains the same header more than once
        // if the header isn't set, we'll get the zero value, which "range" will handle gracefully

        // we need to split each header value on "," to get the full list of "Accept" values (per RFC 2616)
        // https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1
        for _, mediaType := range strings.Split(acceptHeader, ",") {
            // remove "; q=..." if present
            if i := strings.Index(mediaType, ";"); i >= 0 {
                mediaType = mediaType[:i]
            }

            // it's common (but not required) for Accept values to be space separated ("a/b, c/d, e/f")
            mediaType = strings.TrimSpace(mediaType)

            if mediaType == schema2.MediaTypeManifest {
                supportsSchema2 = true
            }
            if mediaType == manifestlist.MediaTypeManifestList {
                supportsManifestList = true
            }
        }
    }

    schema2Manifest, isSchema2 := manifest.(*schema2.DeserializedManifest)
    manifestList, isManifestList := manifest.(*manifestlist.DeserializedManifestList)

    // Only rewrite schema2 manifests when they are being fetched by tag.
    // If they are being fetched by digest, we can't return something not
    // matching the digest.
    if imh.Tag != "" && isSchema2 && !supportsSchema2 {
        // Rewrite manifest in schema1 format
        ctxu.GetLogger(imh).Infof("rewriting manifest %s in schema1 format to support old client", imh.Digest.String())

        manifest, err = imh.convertSchema2Manifest(schema2Manifest)
        if err != nil {
            return
        }
    } else if imh.Tag != "" && isManifestList && !supportsManifestList {
        // Rewrite manifest in schema1 format
        ctxu.GetLogger(imh).Infof("rewriting manifest list %s in schema1 format to support old client", imh.Digest.String())

        // Find the image manifest corresponding to the default
        // platform
        var manifestDigest digest.Digest
        for _, manifestDescriptor := range manifestList.Manifests {
            if manifestDescriptor.Platform.Architecture == defaultArch && manifestDescriptor.Platform.OS == defaultOS {
                manifestDigest = manifestDescriptor.Digest
                break
            }
        }

        if manifestDigest == "" {
            imh.Errors = append(imh.Errors, v2.ErrorCodeManifestUnknown)
            return
        }

        manifest, err = manifests.Get(imh, manifestDigest)
        if err != nil {
            imh.Errors = append(imh.Errors, v2.ErrorCodeManifestUnknown.WithDetail(err))
            return
        }

        // If necessary, convert the image manifest
        if schema2Manifest, isSchema2 := manifest.(*schema2.DeserializedManifest); isSchema2 && !supportsSchema2 {
            manifest, err = imh.convertSchema2Manifest(schema2Manifest)
            if err != nil {
                return
            }
        }
    }

    ct, p, err := manifest.Payload()
    if err != nil {
        return
    }

    w.Header().Set("Content-Type", ct)
    w.Header().Set("Content-Length", fmt.Sprint(len(p)))
    w.Header().Set("Docker-Content-Digest", imh.Digest.String())
    w.Header().Set("Etag", fmt.Sprintf(`"%s"`, imh.Digest))
    w.Write(p)
}

这个函数有点长，我们一步一步分析。
1. manifests, err := imh.Repository.Manifests(imh)
该函数在”docker push 过程中distribution源码分析”中已经分析过了，构建了一个manifestStore的结构体。
2. tags := imh.Repository.Tags(imh)
该函数在”docker push 过程中distribution源码分析”中已经分析过了，构建了一个TagService的结构体。
3. desc, err := tags.Get(imh, imh.Tag)
这个函数是获取对应Tag目录下Current目录下的link文件的内容，也就是当前这个tag对应的Manifest文件的sha256编码，同时也就是这个Manifest文件的位置信息。

// resolve the current revision for name and tag.
func (ts *tagStore) Get(ctx context.Context, tag string) (distribution.Descriptor, error) {
    currentPath, err := pathFor(manifestTagCurrentPathSpec{
        name: ts.repository.Named().Name(),
        tag:  tag,
    })

    if err != nil {
        return distribution.Descriptor{}, err
    }

    revision, err := ts.blobStore.readlink(ctx, currentPath)
    if err != nil {
        switch err.(type) {
        case storagedriver.PathNotFoundError:
            return distribution.Descriptor{}, distribution.ErrTagUnknown{Tag: tag}
        }

        return distribution.Descriptor{}, err
    }

    return distribution.Descriptor{Digest: revision}, nil
}

获取到link文件的内容后，进行校验。

manifest, err = manifests.Get(imh, imh.Digest, options…)
这个函数在前面函数获取到对应Manifest文件的sha256编码后获取对应的文件内容并解析成对应版本的Manifest文件格式。

func (ms *manifestStore) Get(ctx context.Context, dgst digest.Digest, options ...distribution.ManifestServiceOption) (distribution.Manifest, error) {
    context.GetLogger(ms.ctx).Debug("(*manifestStore).Get")

    // TODO(stevvooe): Need to check descriptor from above to ensure that the
    // mediatype is as we expect for the manifest store.

    content, err := ms.blobStore.Get(ctx, dgst)
    if err != nil {
        if err == distribution.ErrBlobUnknown {
            return nil, distribution.ErrManifestUnknownRevision{
                Name:     ms.repository.Named().Name(),
                Revision: dgst,
            }
        }

        return nil, err
    }

    var versioned manifest.Versioned
    if err = json.Unmarshal(content, &versioned); err != nil {
        return nil, err
    }

    switch versioned.SchemaVersion {
    case 1:
        return ms.schema1Handler.Unmarshal(ctx, dgst, content)
    case 2:
        // This can be an image manifest or a manifest list
        switch versioned.MediaType {
        case schema2.MediaTypeManifest:
            return ms.schema2Handler.Unmarshal(ctx, dgst, content)
        case manifestlist.MediaTypeManifestList:
            return ms.manifestListHandler.Unmarshal(ctx, dgst, content)
        default:
            return nil, distribution.ErrManifestVerification{fmt.Errorf("unrecognized manifest content type %s", versioned.MediaType)}
        }
    }

    return nil, fmt.Errorf("unrecognized manifest schema version %d", versioned.SchemaVersion)
}

可以看出其世界上的获取函数是blobStore.Get,获取的的是根目录下的Blobs/sha256/hex[0:2]/hex/data的内容，之后进行json解析。

之后 httprequest即可返回了。
至此Manifest文件下载完成。

Pull Blob layers源码分析

request example:
http://reg.lalalala.com/v2/dhc_cloud/docker-workflow-demo/blobs/sha256:3ec8eef909a1ece1622fda6b0f22414bafb24a88976331637718ad31874fe412

对应的方法：

request function: GET
request URL: /v2/**/blobs/sha256:id
request handler dispatch:blobDispatcher(ctx *Context, r *http.Request)
request handler： blobHandler.GetBlob

根据app.register(v2.RouteNameBlob, blobDispatcher) 所以该请求对应的dispatch是blobDispatcher

这里我们之间看 blobHandler.GetBlob函数：

func (bh *blobHandler) GetBlob(w http.ResponseWriter, r *http.Request) {
    context.GetLogger(bh).Debug("GetBlob")
    blobs := bh.Repository.Blobs(bh)
    desc, err := blobs.Stat(bh, bh.Digest)
    if err != nil {
        if err == distribution.ErrBlobUnknown {
            bh.Errors = append(bh.Errors, v2.ErrorCodeBlobUnknown.WithDetail(bh.Digest))
        } else {
            bh.Errors = append(bh.Errors, errcode.ErrorCodeUnknown.WithDetail(err))
        }
        return
    }

    if err := blobs.ServeBlob(bh, w, r, desc.Digest); err != nil {
        context.GetLogger(bh).Debugf("unexpected error getting blob HTTP handler: %v", err)
        bh.Errors = append(bh.Errors, errcode.ErrorCodeUnknown.WithDetail(err))
        return
    }
}

上面的源码内容很简单，调用了bh.Repository.Blobs(bh) 构建了以blobStore信息，在调用blobs.ServeBlob来提供blob~~或者说真正的处理函数在这里。

看一下bh.Repository.Blobs(bh)函数：

func (repo *repository) Blobs(ctx context.Context) distribution.BlobStore {
    var statter distribution.BlobDescriptorService = &linkedBlobStatter{
        blobStore:   repo.blobStore,
        repository:  repo,
        linkPathFns: []linkPathFunc{blobLinkPath},
    }

    if repo.descriptorCache != nil {
        statter = cache.NewCachedBlobStatter(repo.descriptorCache, statter)
    }

    if repo.registry.blobDescriptorServiceFactory != nil {
        statter = repo.registry.blobDescriptorServiceFactory.BlobAccessController(statter)
    }

    return &linkedBlobStore{
        registry:             repo.registry,
        blobStore:            repo.blobStore,
        blobServer:           repo.blobServer,
        blobAccessController: statter,
        repository:           repo,
        ctx:                  ctx,

        // TODO(stevvooe): linkPath limits this blob store to only layers.
        // This instance cannot be used for manifest checks.
        linkPathFns:            []linkPathFunc{blobLinkPath},
        deleteEnabled:          repo.registry.deleteEnabled,
        resumableDigestEnabled: repo.resumableDigestEnabled,
    }
}

上面函数的内容很简单，不做解析，但是最后linkedBlobStore结果体很重要，后面的所有操作都以依赖这个对象的内容。

这里来看blobs.ServeBlob(bh, w, r, desc.Digest) 函数：

func (bs *blobServer) ServeBlob(ctx context.Context, w http.ResponseWriter, r *http.Request, dgst digest.Digest) error {
    desc, err := bs.statter.Stat(ctx, dgst)
    if err != nil {
        return err
    }

    path, err := bs.pathFn(desc.Digest)
    if err != nil {
        return err
    }

    if bs.redirect {
        redirectURL, err := bs.driver.URLFor(ctx, path, map[string]interface{}{"method": r.Method})
        switch err.(type) {
        case nil:
            // Redirect to storage URL.
            http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect)
            return err

        case driver.ErrUnsupportedMethod:
            // Fallback to serving the content directly.
        default:
            // Some unexpected error.
            return err
        }
    }

    br, err := newFileReader(ctx, bs.driver, path, desc.Size)
    if err != nil {
        return err
    }
    defer br.Close()

    w.Header().Set("ETag", fmt.Sprintf(`"%s"`, desc.Digest)) // If-None-Match handled by ServeContent
    w.Header().Set("Cache-Control", fmt.Sprintf("max-age=%.f", blobCacheControlMaxAge.Seconds()))

    if w.Header().Get("Docker-Content-Digest") == "" {
        w.Header().Set("Docker-Content-Digest", desc.Digest.String())
    }

    if w.Header().Get("Content-Type") == "" {
        // Set the content type if not already set.
        w.Header().Set("Content-Type", desc.MediaType)
    }

    if w.Header().Get("Content-Length") == "" {
        // Set the content length if not already set.
        w.Header().Set("Content-Length", fmt.Sprint(desc.Size))
    }

    http.ServeContent(w, r, desc.Digest.String(), time.Time{}, br)
    return nil
}

这个函数内容也非常简单，先调用bs.pathFn 获取对应文件的路径，然后调用newFileReader(ctx, bs.driver, path, desc.Size)创建一个fileReader对象，之后调用http.ServeContent结束http请求处理，真正的读取文件是在http.ServeContent中处理的。

至此docker pull过程中发送给distribution的两个http请求处理源码解析完毕。
这几篇distribution源码的分析过程中忽略了认证，notify，以及后端驱动本身，这里不做说明，以后看情况在做分析

Sunny_blood

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
docker pull 过程distribution源码分析

docker pull 过程distribution源码分析承接上一篇“docker push 过程中distribution源码分析”本文分析一下distribution在docker pull时候的处理流程。 docker pull 的过程分为pull Manifest文件和Pull Blob(layers）文件两部分，这里安装这两部进行源码分析。
复制链接

扫一扫