WHY
容器停止文件系统并不会自动清除,通过 docker ps -a 也能够看到这些资源。kubelet有container gc的方案用于清理宿主机上的非所需容器
容器镜像耗存储资源,会导致宿主机上会留下不再需要的容器镜像,kubelet设计了一套image gc的方案
WHAT
Kubernetes 通过 imageController 和kubelet中集成的cAdvisor共同管理镜像的生命周期,根据node的磁盘使用触发镜像的GC
Kubernetes 垃圾回收(Garbage Collection)机制由kubelet完成,kubelet定期清理不再使用的容器和镜像,每分钟进行一次容器的GC,每五分钟进行一次镜像的GC
HOW
createAndInitKubelet
--> StartGarbageCollection
--> kl.containerGC.GarbageCollect
--> kl.imageManager.GarbageCollect
本文只分析 image gc部分
- --minimum-image-ttl-duration,默认 2m0s,回收 image 最小年龄
- --image-gc-high-threshold,默认 85,高于此阈值将进行回收
- --image-gc-low-threshold,默认 80,低于此阈值不进行会
imageGCPolicy := images.ImageGCPolicy{
MinAge: kubeCfg.ImageMinimumGCAge.Duration,
HighThresholdPercent: int(kubeCfg.ImageGCHighThresholdPercent),
LowThresholdPercent: int(kubeCfg.ImageGCLowThresholdPercent),
}
1. 实例化 image manager
// setup imageManager
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
if err != nil {
return nil, fmt.Errorf("failed to initialize image manager: %v", err)
}
klet.imageManager = imageManager
1.1 ImageGCManager 接口
// ImageGCManager is an interface for managing lifecycle of all images.
// Implementation is thread-safe.
type ImageGCManager interface {
// Applies the garbage collection policy. Errors include being unable to free
// enough space as per the garbage collection policy.
GarbageCollect() error
// Start async garbage collection of images.
Start()
GetImageList() ([]container.Image, error)
// Delete all unused images.
DeleteUnusedImages() error
}
1.2 NewImageGCManager 函数
realIMageGCManager 实现了 ImageGCManager 接口
// NewImageGCManager instantiates a new ImageGCManager object.
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) {
// Validate policy.
if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
}
if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
}
if policy.LowThresholdPercent > policy.HighThresholdPercent {
return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
}
im := &realImageGCManager{
runtime: runtime,
policy: policy,
imageRecords: make(map[string]*imageRecord),
statsProvider: statsProvider,
recorder: recorder,
nodeRef: nodeRef,
initialized: false,
sandboxImage: sandboxImage,
}
return im, nil
}
2. StartGarbageCollection 函数
主要是定期异步执行 image gc 流程
func (kl *Kubelet) StartGarbageCollection() {
。。。。。。
prevImageGCFailed := false
go wait.Until(func() {
if err := kl.imageManager.GarbageCollect(); err != nil {
if prevImageGCFailed {
klog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
// Only create an event for repeated failures
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
} else {
klog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
}
prevImageGCFailed = true
} else {
var vLevel klog.Level = 4
if prevImageGCFailed {
vLevel = 1
prevImageGCFailed = false
}
klog.V(vLevel).Infof("Image garbage collection succeeded")
}
}, ImageGCPeriod, wait.NeverStop)
}
2.1 GarbageCollect
路径: pkg/kubelet/stats/cadvisor_stats_provider.go
func (im *realImageGCManager) GarbageCollect() error {
// Get disk usage on disk holding images.
fsStats, err := im.statsProvider.ImageFsStats()
if err != nil {
return err
}
2.1.1 初始化 StatsProvider,默认使用 cadvisor
if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
klet.StatsProvider = stats.NewCadvisorStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,
klet.podManager,
klet.runtimeCache,
klet.containerRuntime,
klet.statusManager)
} else {
klet.StatsProvider = stats.NewCRIStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,
klet.podManager,
klet.runtimeCache,
runtimeService,
imageService,
stats.NewLogMetricsService(),
kubecontainer.RealOS{})
}
2.1.2 调用 cadvisor 获取 image 文件系统信息
// ImageFsStats returns the stats of the filesystem for storing images.
func (p *cadvisorStatsProvider) ImageFsStats() (*statsapi.FsStats, error) {
imageFsInfo, err := p.cadvisor.ImagesFsInfo()
if err != nil {
return nil, fmt.Errorf("failed to get imageFs info: %v", err)
}
imageStats, err := p.imageService.ImageStats()
if err != nil || imageStats == nil {
return nil, fmt.Errorf("failed to get image stats: %v", err)
}
var imageFsInodesUsed *uint64
if imageFsInfo.Inodes != nil && imageFsInfo.InodesFree != nil {
imageFsIU := *imageFsInfo.Inodes - *imageFsInfo.InodesFree
imageFsInodesUsed = &imageFsIU
}
return &statsapi.FsStats{
Time: metav1.NewTime(imageFsInfo.Timestamp),
AvailableBytes: &imageFsInfo.Available,
CapacityBytes: &imageFsInfo.Capacity,
UsedBytes: &imageStats.TotalStorageBytes,
InodesFree: imageFsInfo.InodesFree,
Inodes: imageFsInfo.Inodes,
InodesUsed: imageFsInodesUsed,
}, nil
}
2.2 使用的资源已经大于设置的最高阈值image-gc-high-threshold将调用 freeSpace 会触发回收
// If over the max threshold, free enough to place us at the lower threshold.
usagePercent := 100 - int(available*100/capacity)
if usagePercent >= im.policy.HighThresholdPercent {
amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
klog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes down to the low threshold (%d%%).", usagePercent, im.policy.HighThresholdPercent, amountToFree, im.policy.LowThresholdPercent)
freed, err := im.freeSpace(amountToFree, time.Now())
if err != nil {
return err
}
if freed < amountToFree {
err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
return err
}
}
3. freeSpace 函数
发现 image 释放节点上空间
3.1 detectImage
3.1.1 获得所有 image 和 pod
func (im *realImageGCManager) detectImages(detectTime time.Time) (sets.String, error) {
imagesInUse := sets.NewString()
// Always consider the container runtime pod sandbox image in use
imageRef, err := im.runtime.GetImageRef(container.ImageSpec{Image: im.sandboxImage})
if err == nil && imageRef != "" {
imagesInUse.Insert(imageRef)
}
images, err := im.runtime.ListImages()
if err != nil {
return imagesInUse, err
}
pods, err := im.runtime.GetPods(true)
if err != nil {
return imagesInUse, err
}
3.1.2 imageRecords 保存 image map信息,如果 image 被使用,则记录最后使用时间和image大小和第一次发现的时间
for _, image := range images {
klog.V(5).Infof("Adding image ID %s to currentImages", image.ID)
currentImages.Insert(image.ID)
// New image, set it as detected now.
if _, ok := im.imageRecords[image.ID]; !ok {
klog.V(5).Infof("Image ID %s is new", image.ID)
im.imageRecords[image.ID] = &imageRecord{
firstDetected: detectTime,
}
}
// Set last used time to now if the image is being used.
if isImageUsed(image.ID, imagesInUse) {
klog.V(5).Infof("Setting Image ID %s lastUsed to %v", image.ID, now)
im.imageRecords[image.ID].lastUsed = now
}
klog.V(5).Infof("Image ID %s has size %d", image.ID, image.Size)
im.imageRecords[image.ID].size = image.Size
}
3.1.3 删除不再存在的记录
// Remove old images from our records.
for image := range im.imageRecords {
if !currentImages.Has(image) {
klog.V(5).Infof("Image ID %s is no longer present; removing from imageRecords", image)
delete(im.imageRecords, image)
}
}
3.2 images 缓存需要删除的image
// Get all images in eviction order.
images := make([]evictionInfo, 0, len(im.imageRecords))
for image, record := range im.imageRecords {
if isImageUsed(image, imagesInUse) {
klog.V(5).Infof("Image ID %s is being used", image)
continue
}
images = append(images, evictionInfo{
id: image,
imageRecord: *record,
})
}
sort.Sort(byLastUsedAndDetected(images))
3.3 pass 正在被使用的镜像
// Delete unused images until we've freed up enough space.
var deletionErrors []error
spaceFreed := int64(0)
for _, image := range images {
klog.V(5).Infof("Evaluating image ID %s for possible garbage collection", image.id)
// Images that are currently in used were given a newer lastUsed.
if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) {
klog.V(5).Infof("Image ID %s has lastUsed=%v which is >= freeTime=%v, not eligible for garbage collection", image.id, image.lastUsed, freeTime)
continue
}
3.4 如果不是老的够删除了,就无需处理
// Avoid garbage collect the image if the image is not old enough.
// In such a case, the image may have just been pulled down, and will be used by a container right away.
if freeTime.Sub(image.firstDetected) < im.policy.MinAge {
klog.V(5).Infof("Image ID %s has age %v which is less than the policy's minAge of %v, not eligible for garbage collection", image.id, freeTime.Sub(image.firstDetected), im.policy.MinAge)
continue
}
3.5 删除镜像,清除累计镜像大小,已经足够就无需GC操作了
// Remove image. Continue despite errors.
klog.Infof("[imageGCManager]: Removing image %q to free %d bytes", image.id, image.size)
err := im.runtime.RemoveImage(container.ImageSpec{Image: image.id})
if err != nil {
deletionErrors = append(deletionErrors, err)
continue
}
delete(im.imageRecords, image.id)
spaceFreed += image.size
if spaceFreed >= bytesToFree {
break
}
总结:
- 调用 cadvisor 获取 image 信息,节点信息
- 使用的资源已经大于设置的最高阈值image-gc-high-threshold将调用 freeSpace 会触发回收
- detect 发现镜像获得所有正在内使用的镜像
- 在没有使用的镜像里删除,直到机器上的资源已经小于设置的LowThresholdPercent,可以不用回收了