【kubernetes/k8s源码分析】 kubelet image gc 垃圾回收源码分析

WHY

       容器停止文件系统并不会自动清除,通过 docker ps -a 也能够看到这些资源。kubelet有container gc的方案用于清理宿主机上的非所需容器

       容器镜像耗存储资源,会导致宿主机上会留下不再需要的容器镜像,kubelet设计了一套image gc的方案

 

WHAT

     Kubernetes 通过 imageController 和kubelet中集成的cAdvisor共同管理镜像的生命周期,根据node的磁盘使用触发镜像的GC

     Kubernetes 垃圾回收(Garbage Collection)机制由kubelet完成,kubelet定期清理不再使用的容器和镜像,每分钟进行一次容器的GC,每五分钟进行一次镜像的GC

 

HOW

createAndInitKubelet

       -->  StartGarbageCollection

                      -->  kl.containerGC.GarbageCollect

                      -->  kl.imageManager.GarbageCollect

   本文只分析 image gc部分

 

  • --minimum-image-ttl-duration,默认 2m0s,回收 image 最小年龄
  • --image-gc-high-threshold,默认 85,高于此阈值将进行回收
  • --image-gc-low-threshold,默认 80,低于此阈值不进行会
imageGCPolicy := images.ImageGCPolicy{
	MinAge:               kubeCfg.ImageMinimumGCAge.Duration,
	HighThresholdPercent: int(kubeCfg.ImageGCHighThresholdPercent),
	LowThresholdPercent:  int(kubeCfg.ImageGCLowThresholdPercent),
}

 

1. 实例化 image manager 

// setup imageManager
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
if err != nil {
	return nil, fmt.Errorf("failed to initialize image manager: %v", err)
}
klet.imageManager = imageManager

    1.1 ImageGCManager 接口

// ImageGCManager is an interface for managing lifecycle of all images.
// Implementation is thread-safe.
type ImageGCManager interface {
	// Applies the garbage collection policy. Errors include being unable to free
	// enough space as per the garbage collection policy.
	GarbageCollect() error

	// Start async garbage collection of images.
	Start()

	GetImageList() ([]container.Image, error)

	// Delete all unused images.
	DeleteUnusedImages() error
}

    1.2 NewImageGCManager 函数

     realIMageGCManager 实现了 ImageGCManager 接口

// NewImageGCManager instantiates a new ImageGCManager object.
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) {
	// Validate policy.
	if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
		return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
	}
	if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
		return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
	}
	if policy.LowThresholdPercent > policy.HighThresholdPercent {
		return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
	}
	im := &realImageGCManager{
		runtime:       runtime,
		policy:        policy,
		imageRecords:  make(map[string]*imageRecord),
		statsProvider: statsProvider,
		recorder:      recorder,
		nodeRef:       nodeRef,
		initialized:   false,
		sandboxImage:  sandboxImage,
	}

	return im, nil
}

 

2. StartGarbageCollection 函数

    主要是定期异步执行 image gc 流程

func (kl *Kubelet) StartGarbageCollection() {
    。。。。。。
	prevImageGCFailed := false
	go wait.Until(func() {
		if err := kl.imageManager.GarbageCollect(); err != nil {
			if prevImageGCFailed {
				klog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
				// Only create an event for repeated failures
				kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
			} else {
				klog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
			}
			prevImageGCFailed = true
		} else {
			var vLevel klog.Level = 4
			if prevImageGCFailed {
				vLevel = 1
				prevImageGCFailed = false
			}

			klog.V(vLevel).Infof("Image garbage collection succeeded")
		}
	}, ImageGCPeriod, wait.NeverStop)
}

    2.1 GarbageCollect

     路径: pkg/kubelet/stats/cadvisor_stats_provider.go

func (im *realImageGCManager) GarbageCollect() error {
	// Get disk usage on disk holding images.
	fsStats, err := im.statsProvider.ImageFsStats()
	if err != nil {
		return err
	}

   2.1.1 初始化 StatsProvider,默认使用 cadvisor

if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
	klet.StatsProvider = stats.NewCadvisorStatsProvider(
		klet.cadvisor,
		klet.resourceAnalyzer,
		klet.podManager,
		klet.runtimeCache,
		klet.containerRuntime,
		klet.statusManager)
} else {
	klet.StatsProvider = stats.NewCRIStatsProvider(
		klet.cadvisor,
		klet.resourceAnalyzer,
		klet.podManager,
		klet.runtimeCache,
		runtimeService,
		imageService,
		stats.NewLogMetricsService(),
		kubecontainer.RealOS{})
}

    2.1.2 调用 cadvisor 获取 image 文件系统信息

// ImageFsStats returns the stats of the filesystem for storing images.
func (p *cadvisorStatsProvider) ImageFsStats() (*statsapi.FsStats, error) {
	imageFsInfo, err := p.cadvisor.ImagesFsInfo()
	if err != nil {
		return nil, fmt.Errorf("failed to get imageFs info: %v", err)
	}
	imageStats, err := p.imageService.ImageStats()
	if err != nil || imageStats == nil {
		return nil, fmt.Errorf("failed to get image stats: %v", err)
	}

	var imageFsInodesUsed *uint64
	if imageFsInfo.Inodes != nil && imageFsInfo.InodesFree != nil {
		imageFsIU := *imageFsInfo.Inodes - *imageFsInfo.InodesFree
		imageFsInodesUsed = &imageFsIU
	}

	return &statsapi.FsStats{
		Time:           metav1.NewTime(imageFsInfo.Timestamp),
		AvailableBytes: &imageFsInfo.Available,
		CapacityBytes:  &imageFsInfo.Capacity,
		UsedBytes:      &imageStats.TotalStorageBytes,
		InodesFree:     imageFsInfo.InodesFree,
		Inodes:         imageFsInfo.Inodes,
		InodesUsed:     imageFsInodesUsed,
	}, nil
}

  2.2 使用的资源已经大于设置的最高阈值image-gc-high-threshold将调用 freeSpace 会触发回收

// If over the max threshold, free enough to place us at the lower threshold.
usagePercent := 100 - int(available*100/capacity)
if usagePercent >= im.policy.HighThresholdPercent {
	amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
	klog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes down to the low threshold (%d%%).", usagePercent, im.policy.HighThresholdPercent, amountToFree, im.policy.LowThresholdPercent)
	freed, err := im.freeSpace(amountToFree, time.Now())
	if err != nil {
		return err
	}

	if freed < amountToFree {
		err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
		im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
		return err
	}
}

 

3. freeSpace 函数

    发现 image 释放节点上空间

    3.1 detectImage

      3.1.1 获得所有 image 和 pod

func (im *realImageGCManager) detectImages(detectTime time.Time) (sets.String, error) {
	imagesInUse := sets.NewString()

	// Always consider the container runtime pod sandbox image in use
	imageRef, err := im.runtime.GetImageRef(container.ImageSpec{Image: im.sandboxImage})
	if err == nil && imageRef != "" {
		imagesInUse.Insert(imageRef)
	}

	images, err := im.runtime.ListImages()
	if err != nil {
		return imagesInUse, err
	}
	pods, err := im.runtime.GetPods(true)
	if err != nil {
		return imagesInUse, err
	}

    3.1.2 imageRecords 保存 image map信息,如果 image 被使用,则记录最后使用时间和image大小和第一次发现的时间

for _, image := range images {
	klog.V(5).Infof("Adding image ID %s to currentImages", image.ID)
	currentImages.Insert(image.ID)

	// New image, set it as detected now.
	if _, ok := im.imageRecords[image.ID]; !ok {
		klog.V(5).Infof("Image ID %s is new", image.ID)
		im.imageRecords[image.ID] = &imageRecord{
			firstDetected: detectTime,
		}
	}

	// Set last used time to now if the image is being used.
	if isImageUsed(image.ID, imagesInUse) {
		klog.V(5).Infof("Setting Image ID %s lastUsed to %v", image.ID, now)
		im.imageRecords[image.ID].lastUsed = now
	}

	klog.V(5).Infof("Image ID %s has size %d", image.ID, image.Size)
	im.imageRecords[image.ID].size = image.Size
}

   3.1.3 删除不再存在的记录

// Remove old images from our records.
for image := range im.imageRecords {
	if !currentImages.Has(image) {
		klog.V(5).Infof("Image ID %s is no longer present; removing from imageRecords", image)
		delete(im.imageRecords, image)
	}
}

    3.2 images 缓存需要删除的image

// Get all images in eviction order.
images := make([]evictionInfo, 0, len(im.imageRecords))
for image, record := range im.imageRecords {
	if isImageUsed(image, imagesInUse) {
		klog.V(5).Infof("Image ID %s is being used", image)
		continue
	}
	images = append(images, evictionInfo{
		id:          image,
		imageRecord: *record,
	})
}
sort.Sort(byLastUsedAndDetected(images))

    3.3 pass 正在被使用的镜像

// Delete unused images until we've freed up enough space.
var deletionErrors []error
spaceFreed := int64(0)
for _, image := range images {
	klog.V(5).Infof("Evaluating image ID %s for possible garbage collection", image.id)
	// Images that are currently in used were given a newer lastUsed.
	if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) {
		klog.V(5).Infof("Image ID %s has lastUsed=%v which is >= freeTime=%v, not eligible for garbage collection", image.id, image.lastUsed, freeTime)
		continue
	}

    3.4 如果不是老的够删除了,就无需处理

// Avoid garbage collect the image if the image is not old enough.
// In such a case, the image may have just been pulled down, and will be used by a container right away.

if freeTime.Sub(image.firstDetected) < im.policy.MinAge {
	klog.V(5).Infof("Image ID %s has age %v which is less than the policy's minAge of %v, not eligible for garbage collection", image.id, freeTime.Sub(image.firstDetected), im.policy.MinAge)
	continue
}

    3.5 删除镜像,清除累计镜像大小,已经足够就无需GC操作了

// Remove image. Continue despite errors.
klog.Infof("[imageGCManager]: Removing image %q to free %d bytes", image.id, image.size)
err := im.runtime.RemoveImage(container.ImageSpec{Image: image.id})
if err != nil {
	deletionErrors = append(deletionErrors, err)
	continue
}
delete(im.imageRecords, image.id)
spaceFreed += image.size

if spaceFreed >= bytesToFree {
	break
}

 

总结:

  • 调用 cadvisor 获取 image 信息,节点信息
  • 使用的资源已经大于设置的最高阈值image-gc-high-threshold将调用 freeSpace 会触发回收
  • detect 发现镜像获得所有正在内使用的镜像
  • 在没有使用的镜像里删除,直到机器上的资源已经小于设置的LowThresholdPercent,可以不用回收了
  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值