kubelet提供了回收conatainer和image的机制,用来释放资源。
对于container的回收,有下面三个参数
a. --minimum-container-ttl-duration: 容器退出后多久可以被回收。默认值为0,随时可被回收
b. --maximum-dead-containers-per-container: 每个容器可以保留多少个退出的实例,默认值为-1,可以保留任意多个退出的实例
c. --maximum-dead-containers: 整个node上可以保留多少个退出的容器实例,默认值为-1,可以保留任意多个退出的实例
这三个参数会被废弃,使用–eviction-hard or --eviction-soft实现
对于image的回收,有下面三个参数
a. --minimum-image-ttl-duration: 镜像不被使用后,多久可以被回收,
b. --image-gc-high-threshold: 当磁盘使用率达到多少时,可以进行image回收。如果设置为100则不进行image回收
c. --image-gc-low-threshold: 当开始回收image时,磁盘使用率降低到多少时停止image回收,值不能大于--image-gc-high-threshold
这三个参数也可以在kubelet的配置文件中指定,参考:https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/
源码分析
- 先通过NewMainKubelet创建垃圾回收的manager,再调用StartGarbageCollection启动两个协程分别回收container和image
func createAndInitKubelet(...)
//1.1 创建垃圾回收的manager
k, err = kubelet.NewMainKubelet(...)
...
//1.2 启动两个协程分别回收container和image
k.StartGarbageCollection()
1.1 创建垃圾回收的manager
func NewMainKubelet(...)
//回收container的参数
containerGCPolicy := kubecontainer.GCPolicy{
MinAge: minimumGCAge.Duration,
MaxPerPodContainer: int(maxPerPodContainerCount),
MaxContainers: int(maxContainerCount),
}
//回收image的参数
imageGCPolicy := images.ImageGCPolicy{
MinAge: kubeCfg.ImageMinimumGCAge.Duration,
HighThresholdPercent: int(kubeCfg.ImageGCHighThresholdPercent),
LowThresholdPercent: int(kubeCfg.ImageGCLowThresholdPercent),
}
//1.1.1 创建KubeGenericRuntimeManager
runtime, err := kuberuntime.NewKubeGenericRuntimeManager(...)
klet.containerRuntime = runtime
//1.1.2 创建containerGC
// setup containerGC
containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy, klet.sourcesReady)
if err != nil {
return nil, err
}
klet.containerGC = containerGC
//1.1.3 创建imageManager
// setup imageManager
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
if err != nil {
return nil, fmt.Errorf("failed to initialize image manager: %v", err)
}
klet.imageManager = imageManager
1.1.1 创建KubeGenericRuntimeManager
container的回收最终会调用到kubeRuntimeManager.containerGC
func NewKubeGenericRuntimeManager(...)
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
1.1.2 创建containerGC
// NewContainerGC creates a new instance of GC with the specified policy.
func NewContainerGC(runtime Runtime, policy GCPolicy, sourcesReadyProvider SourcesReadyProvider) (GC, error) {
if policy.MinAge < 0 {
return nil, fmt.Errorf("invalid minimum garbage collection age: %v", policy.MinAge)
}
return &realContainerGC{
runtime: runtime,
policy: policy,
sourcesReadyProvider: sourcesReadyProvider,
}, nil
}
1.1.3 创建imageManager
// NewImageGCManager instantiates a new ImageGCManager object.
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) {
// Validate policy.
if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
}
if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
}
if policy.LowThresholdPercent > policy.HighThresholdPercent {
return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
}
im := &realImageGCManager{
runtime: runtime,
policy: policy,
imageRecords: make(map[string]*imageRecord),
statsProvider: statsProvider,
recorder: recorder,
nodeRef: nodeRef,
initialized: false,
sandboxImage: sandboxImage,
}
return im, nil
}
1.2 启动两个协程分别回收container和image
// StartGarbageCollection starts garbage collection threads.
func (kl *Kubelet) StartGarbageCollection() {
loggedContainerGCFailure := false
go wait.Until(func() {
//1.2.1 启动协程回收container
if err := kl.containerGC.GarbageCollect(); err != nil {
klog.ErrorS(err, "Container garbage collection failed")
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error())
loggedContainerGCFailure = true
} else {
var vLevel klog.Level = 4
if loggedContainerGCFailure {
vLevel = 1
loggedContainerGCFailure = false
}
klog.V(vLevel).InfoS("Container garbage collection succeeded")
}
}, ContainerGCPeriod, wait.NeverStop)
//如果--image-gc-high-threshold设置为100,则不进行image的回收
// when the high threshold is set to 100, stub the image GC manager
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
klog.V(2).InfoS("ImageGCHighThresholdPercent is set 100, Disable image GC")
return
}
prevImageGCFailed := false
go wait.Until(func() {
//1.2.2 启动协程回收image
if err := kl.imageManager.GarbageCollect(); err != nil {
if prevImageGCFailed {
klog.ErrorS(err, "Image garbage collection failed multiple times in a row")
// Only create an event for repeated failures
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
} else {
klog.ErrorS(err, "Image garbage collection failed once. Stats initialization may not have completed yet")
}
prevImageGCFailed = true
} else {
var vLevel klog.Level = 4
if prevImageGCFailed {
vLevel = 1
prevImageGCFailed = false
}
klog.V(vLevel).InfoS("Image garbage collection succeeded")
}
}, ImageGCPeriod, wait.NeverStop)
}
1.2.1 启动协程回收container
按照回收策略GCPolicy回收container,Sandboxe和pod的log目录
//pkg/kubelet/container/container_gc.go
func (cgc *realContainerGC) GarbageCollect() error {
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), false)
}
//pkg/kubelet/kuberuntime/kuberuntime_manager.go
// GarbageCollect removes dead containers using the specified container gc policy.
func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady, evictNonDeletedPods)
}
//pkg/kubelet/kuberuntime/kuberuntime_gc.go
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
errors := []error{}
// Remove evictable containers
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
errors = append(errors, err)
}
// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil {
errors = append(errors, err)
}
// Remove pod sandbox log directory
if err := cgc.evictPodLogsDirectories(allSourcesReady); err != nil {
errors = append(errors, err)
}
return utilerrors.NewAggregate(errors)
}
1.2.2 启动协程回收image
如果磁盘使用率超过指定的阈值,就会调用runtime的RemoveImage回收image
//pkg/kubelet/images/image_gc_manager.go:
func (im *realImageGCManager) GarbageCollect() error {
// Get disk usage on disk holding images.
fsStats, err := im.statsProvider.ImageFsStats()
if err != nil {
return err
}
var capacity, available int64
if fsStats.CapacityBytes != nil {
capacity = int64(*fsStats.CapacityBytes)
}
if fsStats.AvailableBytes != nil {
available = int64(*fsStats.AvailableBytes)
}
if available > capacity {
klog.InfoS("Availability is larger than capacity", "available", available, "capacity", capacity)
available = capacity
}
// Check valid capacity.
if capacity == 0 {
err := goerrors.New("invalid capacity 0 on image filesystem")
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
return err
}
//超出阈值了,需要调用freeSpace释放image
// If over the max threshold, free enough to place us at the lower threshold.
usagePercent := 100 - int(available*100/capacity)
if usagePercent >= im.policy.HighThresholdPercent {
amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
klog.InfoS("Disk usage on image filesystem is over the high threshold, trying to free bytes down to the low threshold", "usage", usagePercent, "highThreshold", im.policy.HighThresholdPercent, "amountToFree", amountToFree, "lowThreshold", im.policy.LowThresholdPercent)
freed, err := im.freeSpace(amountToFree, time.Now())
if err != nil {
return err
}
if freed < amountToFree {
err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
return err
}
}
return nil
}