kubelet是负载干活的组件,它会定期的清理多余死掉的容器和镜像,这篇blog基于kubernetes1.7.6的代码,关于gc的深入源码分析
好了,先看gc是随着kubelet启动而启动的pkg/kubelet/kubelet.go,
func (kl *Kubelet) StartGarbageCollection() {
loggedContainerGCFailure := false
go wait.Until(func() {
if err := kl.containerGC.GarbageCollect(); err != nil {
glog.Errorf("Container garbage collection failed: %v", err)
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error())
loggedContainerGCFailure = true
} else {
var vLevel glog.Level = 4
if loggedContainerGCFailure {
vLevel = 1
loggedContainerGCFailure = false
}
glog.V(vLevel).Infof("Container garbage collection succeeded")
}
}, ContainerGCPeriod, wait.NeverStop)
prevImageGCFailed := false
go wait.Until(func() {
if err := kl.imageManager.GarbageCollect(); err != nil {
if prevImageGCFailed {
glog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
// Only create an event for repeated failures
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
} else {
glog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
}
prevImageGCFailed = true
} else {
var vLevel glog.Level = 4
if prevImageGCFailed {
vLevel = 1
prevImageGCFailed = false
}
glog.V(vLevel).Infof("Image garbage collection succeeded")
}
}, ImageGCPeriod, wait.NeverStop)
}
上面代码分别启动的容器的gc(containerGC.GarbageCollect())和镜像的gc(imageManager.GarbageCollect()),
先看容器的gc pkg/kubelet/kuberuntime/kuberuntime_gc.go,
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Remove evictable containers
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
return err
}
// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil {
return err
}
// Remove pod sandbox log directory
return cgc.evictPodLogsDirectories(allSourcesReady)
}
这里删除容器分为两个部分,一个是删除业务容器,一个是删除Sandbox容器,当然最后都会去删除他们的日志目录。
业务容器删除
先看删除业务容器的过程
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Separate containers by evict units.
evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
if err != nil {
return err
}
// Remove deleted pod containers if all sources are ready.
if allSourcesReady {
for key, unit := range evictUnits {
if cgc.isPodDeleted(key.uid) || evictNonDeletedPods {
cgc.removeOldestN(unit, len(unit)) // Remove all.
delete(evictUnits, key)
}
}
}
// Enforce max containers per evict unit.
if gcPolicy.MaxPerPodContainer >= 0 {
cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer)
}
// Enforce max total number of containers.
if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
// Leave an equal number of containers per evict unit (min: 1).
numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
if numContainersPerEvictUnit < 1 {
numContainersPerEvictUnit = 1
}
cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)
// If we still need to evict, evict oldest first.
numContainers := evictUnits.NumContainers()
if numContainers > gcPolicy.MaxContainers {
flattened := make([]containerGCInfo, 0, numContainers)
for key := range evictUnits {
flattened = append(flattened, evictUnits[key]...)
}
sort.Sort(byCreated(flattened))
cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
}
}
return nil
}
上面的代码做的多层判断,首先是删除超过gcPolicy.MinAge时间死掉pod的容器,然后删除单个pod运行最大死亡容器gcPolicy.MaxPerPodContainer,最后是删除整个机器上面死亡容器的上限。
evictContainers是过去可以驱逐的容器(退出且超过gcPolicy.MinAge时间的容器)具体看下面:
func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) {
containers, err := cgc.manager.getKubeletContainers(true)
...
for _, container := range containers {
if container.State == runtimeapi.ContainerState_CONTAINER_RUNNING {
continue
}
createdAt := time.Unix(0, container.CreatedAt)
if newestGCTime.Before(createdAt) {
continue
}
...
}
然后是删除单个pod死亡容器上限的
func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
for key := range evictUnits {
toRemove := len(evictUnits[key]) - MaxContainers
if toRemove > 0 {
evictUnits[key] = cgc.removeOldestN(evictUnits[key], toRemove)
}
}
}
最后是删除超过上限的死亡容器
numContainers := evictUnits.NumContainers()
if numContainers > gcPolicy.MaxContainers {
flattened := make([]containerGCInfo, 0, numContainers)
for key := range evictUnits {
flattened = append(flattened, evictUnits[key]...)
}
sort.Sort(byCreated(flattened))
cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
}
Sandbox容器删除
然后是删除Sandbox容器,这里pkg/kubelet/kuberuntime/kuberuntime_gc.go
func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return err
}
sandboxes, err := cgc.manager.getKubeletSandboxes(true)
if err != nil {
return err
}
sandboxesByPod := make(sandboxesByPodUID)
for _, sandbox := range sandboxes {
podUID := types.UID(sandbox.Metadata.Uid)
sandboxInfo := sandboxGCInfo{
id: sandbox.Id,
createTime: time.Unix(0, sandbox.CreatedAt),
}
// Set ready sandboxes to be active.
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
sandboxInfo.active = true
}
// 查询这个Sandbox是否有关联容器
hasContainers := false
sandboxID := sandbox.Id
for _, container := range containers {
if container.PodSandboxId == sandboxID {
hasContainers = true
break
}
}
if hasContainers {
sandboxInfo.active = true
}
sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
}
// Sort the sandboxes by age.
for uid := range sandboxesByPod {
sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
}
for podUID, sandboxes := range sandboxesByPod {
if cgc.isPodDeleted(podUID) || evictNonDeletedPods {
// 如果这个pod是要被驱逐出这台机器的话,所以的Sandbox直接全部回收
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
} else {
// 如果不是则保留最新的一个
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
}
}
return nil
}
与上面业务容器不同的是,Sandbox的删除需要满足三个条件,缺一不可,第一是必须不能是ready状态的,第二Sandbox不能关联容器,第三如果pod不在这个节点上可以全部删除,如果仍然在这个节点上,则还需要保留最新的一个。