1. 环境说明
Kubernetes
源码版本:remotes/origin/release-1.25
Kubernetes
编译出来的Kubelet
版本:Kubernetes v1.24.0-beta.0.2463+ee7799bab469d7
Kubernetes
集群实验环境:使用Kubernetes v1.25.4
二进制的方式搭建了一个单节点集群
K8S 单节点单节点搭建可以参考:Kubernetes v1.25 搭建单节点集群用于Debug K8S源码
Golang
版本:go1.19.3 linux/amd64
IDEA
版本:2022.2.3
Delve
版本:1.9.1
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# dlv version
Delve Debugger
Version: 1.9.1
Build: $Id: d81b9fd12bfa603f3cf7a4bc842398bd61c42940 $
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# go version
go version go1.19.3 linux/amd64
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# kubectl version
WARNING: This version information is deprecated and will be replaced with the output from kubectl version --short. Use --output=yaml|json to get the full version.
Client Version: version.Info{Major:"1", Minor:"25", GitVersion:"v1.25.4", GitCommit:"872a965c6c6526caa949f0c6ac028ef7aff3fb78", GitTreeState:"clean", BuildDate:"2022-11-09T13:36:36Z", GoVersion:"go1.19.3", Compiler:"gc", Platform:"linux/amd64"}
Kustomize Version: v4.5.7
Server Version: version.Info{Major:"1", Minor:"25", GitVersion:"v1.25.4", GitCommit:"872a965c6c6526caa949f0c6ac028ef7aff3fb78", GitTreeState:"clean", BuildDate:"2022-11-09T13:29:58Z", GoVersion:"go1.19.3", Compiler:"gc", Platform:"linux/amd64"}
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# kubectl get nodes -owide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
k8s-master1 Ready <none> 31h v1.25.4 192.168.11.71 <none> CentOS Linux 7 (Core) 3.10.0-1160.80.1.el7.x86_64 containerd://1.6.10
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# kubectl get componentstatus
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
etcd-0 Healthy {"health":"true","reason":""}
controller-manager Healthy ok
scheduler Healthy ok
[root@k8s-master1 kubernetes]#
Kubelet
启动参数配置如下:
[root@k8s-master1 kubernetes]# ps -ef|grep "/usr/local/bin/kubelet"
root 7972 1 6 07:06 ? 00:00:06 /usr/local/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig --kubeconfig=/etc/kubernetes/kubelet.kubeconfig --config=/etc/kubernetes/kubelet-conf.yml --container-runtime-endpoint=unix:///run/containerd/containerd.sock --node-labels=node.kubernetes.io/node= --v=8
root 9549 6424 0 07:07 pts/0 00:00:00 grep --color=auto /usr/local/bin/kubelet
[root@k8s-master1 kubernetes]#
Kubelet
参数配置如下:
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
address: 0.0.0.0
port: 10250
readOnlyPort: 10255
authentication:
anonymous:
enabled: false
webhook:
cacheTTL: 2m0s
enabled: true
x509:
clientCAFile: /etc/kubernetes/pki/ca.pem
authorization:
mode: Webhook
webhook:
cacheAuthorizedTTL: 5m0s
cacheUnauthorizedTTL: 30s
cgroupDriver: systemd
cgroupsPerQOS: true
clusterDNS:
- 10.96.0.10
clusterDomain: cluster.local
containerLogMaxFiles: 5
containerLogMaxSize: 10Mi
contentType: application/vnd.kubernetes.protobuf
cpuCFSQuota: true
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
enableControllerAttachDetach: true
enableDebuggingHandlers: true
enforceNodeAllocatable:
- pods
eventBurst: 10
eventRecordQPS: 5
evictionHard:
imagefs.available: 15%
memory.available: 100Mi
nodefs.available: 10%
nodefs.inodesFree: 5%
evictionPressureTransitionPeriod: 5m0s
failSwapOn: true
fileCheckFrequency: 20s
hairpinMode: promiscuous-bridge
healthzBindAddress: 127.0.0.1
healthzPort: 10248
httpCheckFrequency: 20s
imageGCHighThresholdPercent: 85
imageGCLowThresholdPercent: 80
imageMinimumGCAge: 2m0s
iptablesDropBit: 15
iptablesMasqueradeBit: 14
kubeAPIBurst: 10
kubeAPIQPS: 5
makeIPTablesUtilChains: true
maxOpenFiles: 1000000
maxPods: 110
nodeStatusUpdateFrequency: 10s
oomScoreAdj: -999
podPidsLimit: -1
registryBurst: 10
registryPullQPS: 5
resolvConf: /etc/resolv.conf
rotateCertificates: true
runtimeRequestTimeout: 2m0s
serializeImagePulls: true
staticPodPath: /etc/kubernetes/manifests
streamingConnectionIdleTimeout: 4h0m0s
syncFrequency: 1m0s
volumeStatsAggPeriod: 1m0s
2. 组件概览
组件概览 |
PodManager
主要职责如下:
PodManager
负责存储和管理对Pod
的访问,维护Static Pod
以及Mirror Pod
之间的映射Kubelet
从3
个来源发现Pod
的更新,分别是:HttP, File, Apiserver
。所有来自非Apiserver
源的Pod
就成为Static Pod
。Apiserver
并不知道Statci Pod
的存在。因此,为了能够监视Static Pod
,Kubelet
会通过Apiserver
为每个Static Pod
创建一个Mirror Pod
Mirror Pod
和Static Pod
具有相同的Pod Fullname(name + namespace)
,尽管Mirror Pod
和Static Pod
的元数据不同,譬如UID
。通过利用Kubelet
使用Pod
的Fullname
报告Pod
状态这个特征,Mirror Pod
的状态始终反应Static Pod
的实际状态,当Static Pod
被删除的时候,关联的孤立Mirror Pod
也将被删除- 注意:
PodManager
并非仅仅维护StaticPod
和MirrorPod
之间的映射关系,PodManager
还缓存了当前节点的所有Pod
3. 源码分析
Manager |
我们先来看看PodManager
的接口定义,如下:
只要理解了组件概览中对于PodManager
工作职责的介绍,我们再来看PodManager
的接口定义就不难理解了
// pkg/kubelet/pod/pod_manager.go
type Manager interface {
// GetPods returns the regular pods bound to the kubelet and their spec.
GetPods() []*v1.Pod
// GetPodByFullName returns the (non-mirror) pod that matches full name, as well as
// whether the pod was found.
GetPodByFullName(podFullName string) (*v1.Pod, bool)
// GetPodByName provides the (non-mirror) pod that matches namespace and
// name, as well as whether the pod was found.
GetPodByName(namespace, name string) (*v1.Pod, bool)
// GetPodByUID provides the (non-mirror) pod that matches pod UID, as well as
// whether the pod is found.
GetPodByUID(types.UID) (*v1.Pod, bool)
// GetPodByMirrorPod returns the static pod for the given mirror pod and
// whether it was known to the pod manager.
GetPodByMirrorPod(*v1.Pod) (*v1.Pod, bool)
// GetMirrorPodByPod returns the mirror pod for the given static pod and
// whether it was known to the pod manager.
GetMirrorPodByPod(*v1.Pod) (*v1.Pod, bool)
// GetPodsAndMirrorPods returns the both regular and mirror pods.
GetPodsAndMirrorPods() ([]*v1.Pod, []*v1.Pod)
// SetPods replaces the internal pods with the new pods.
// It is currently only used for testing.
SetPods(pods []*v1.Pod)
// AddPod adds the given pod to the manager.
AddPod(pod *v1.Pod)
// UpdatePod updates the given pod in the manager.
UpdatePod(pod *v1.Pod)
// DeletePod deletes the given pod from the manager. For mirror pods,
// this means deleting the mappings related to mirror pods. For non-
// mirror pods, this means deleting from indexes for all non-mirror pods.
DeletePod(pod *v1.Pod)
// GetOrphanedMirrorPodNames returns names of orphaned mirror pods
GetOrphanedMirrorPodNames() []string
// TranslatePodUID returns the actual UID of a pod. If the UID belongs to
// a mirror pod, returns the UID of its static pod. Otherwise, returns the
// original UID.
//
// All public-facing functions should perform this translation for UIDs
// because user may provide a mirror pod UID, which is not recognized by
// internal Kubelet functions.
TranslatePodUID(uid types.UID) kubetypes.ResolvedPodUID
// GetUIDTranslations returns the mappings of static pod UIDs to mirror pod
// UIDs and mirror pod UIDs to static pod UIDs.
GetUIDTranslations() (podToMirror map[kubetypes.ResolvedPodUID]kubetypes.MirrorPodUID, mirrorToPod map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID)
// IsMirrorPodOf returns true if mirrorPod is a correct representation of
// pod; false otherwise.
IsMirrorPodOf(mirrorPod, pod *v1.Pod) bool
MirrorClient
}
既然是管理StaticPod
与Mirror Pod
的一一映射,那么其中必然是通过Map
来实现的。MirrorPod
和Static Pod
之间沟通的桥梁就是Fullname
,也就是metadata.data + metadata.namespace
。PodManager
的所有方法实现比较简单,我们在这里就不再一一分析了。
// pkg/kubelet/pod/pod_manager.go
type basicManager struct {
// Protects all internal maps.
lock sync.RWMutex
// Regular pods indexed by UID.
podByUID map[kubetypes.ResolvedPodUID]*v1.Pod
// Mirror pods indexed by UID.
mirrorPodByUID map[kubetypes.MirrorPodUID]*v1.Pod
// Pods indexed by full name for easy access.
podByFullName map[string]*v1.Pod
mirrorPodByFullName map[string]*v1.Pod
// Mirror pod UID to pod UID map.
translationByUID map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID
// basicManager is keeping secretManager and configMapManager up-to-date.
secretManager secret.Manager
configMapManager configmap.Manager
// A mirror pod client to create/delete mirror pods.
MirrorClient
}
所有接口实现如下:
// pkg/kubelet/pod/pod_manager.go
// Set the internal pods based on the new pods.
func (pm *basicManager) SetPods(newPods []*v1.Pod) {
pm.lock.Lock()
defer pm.lock.Unlock()
pm.podByUID = make(map[kubetypes.ResolvedPodUID]*v1.Pod)
pm.podByFullName = make(map[string]*v1.Pod)
pm.mirrorPodByUID = make(map[kubetypes.MirrorPodUID]*v1.Pod)
pm.mirrorPodByFullName = make(map[string]*v1.Pod)
pm.translationByUID = make(map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID)
pm.updatePodsInternal(newPods...)
}
func (pm *basicManager) AddPod(pod *v1.Pod) {
pm.UpdatePod(pod)
}
func (pm *basicManager) UpdatePod(pod *v1.Pod) {
pm.lock.Lock()
defer pm.lock.Unlock()
pm.updatePodsInternal(pod)
}
// updateMetrics updates the metrics surfaced by the pod manager.
// oldPod or newPod may be nil to signify creation or deletion.
func updateMetrics(oldPod, newPod *v1.Pod) {
var numEC int
if oldPod != nil {
numEC -= len(oldPod.Spec.EphemeralContainers)
}
if newPod != nil {
numEC += len(newPod.Spec.EphemeralContainers)
}
if numEC != 0 {
metrics.ManagedEphemeralContainers.Add(float64(numEC))
}
}
// updatePodsInternal replaces the given pods in the current state of the
// manager, updating the various indices. The caller is assumed to hold the
// lock.
func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
for _, pod := range pods {
podFullName := kubecontainer.GetPodFullName(pod)
// This logic relies on a static pod and its mirror to have the same name.
// It is safe to type convert here due to the IsMirrorPod guard.
if kubetypes.IsMirrorPod(pod) {
mirrorPodUID := kubetypes.MirrorPodUID(pod.UID)
pm.mirrorPodByUID[mirrorPodUID] = pod
pm.mirrorPodByFullName[podFullName] = pod
if p, ok := pm.podByFullName[podFullName]; ok {
pm.translationByUID[mirrorPodUID] = kubetypes.ResolvedPodUID(p.UID)
}
} else {
resolvedPodUID := kubetypes.ResolvedPodUID(pod.UID)
updateMetrics(pm.podByUID[resolvedPodUID], pod)
pm.podByUID[resolvedPodUID] = pod
pm.podByFullName[podFullName] = pod
if mirror, ok := pm.mirrorPodByFullName[podFullName]; ok {
pm.translationByUID[kubetypes.MirrorPodUID(mirror.UID)] = resolvedPodUID
}
}
}
}
func (pm *basicManager) DeletePod(pod *v1.Pod) {
updateMetrics(pod, nil)
pm.lock.Lock()
defer pm.lock.Unlock()
podFullName := kubecontainer.GetPodFullName(pod)
// It is safe to type convert here due to the IsMirrorPod guard.
if kubetypes.IsMirrorPod(pod) {
mirrorPodUID := kubetypes.MirrorPodUID(pod.UID)
delete(pm.mirrorPodByUID, mirrorPodUID)
delete(pm.mirrorPodByFullName, podFullName)
delete(pm.translationByUID, mirrorPodUID)
} else {
delete(pm.podByUID, kubetypes.ResolvedPodUID(pod.UID))
delete(pm.podByFullName, podFullName)
}
}
func (pm *basicManager) GetPods() []*v1.Pod {
pm.lock.RLock()
defer pm.lock.RUnlock()
return podsMapToPods(pm.podByUID)
}
func (pm *basicManager) GetPodsAndMirrorPods() ([]*v1.Pod, []*v1.Pod) {
pm.lock.RLock()
defer pm.lock.RUnlock()
pods := podsMapToPods(pm.podByUID)
mirrorPods := mirrorPodsMapToMirrorPods(pm.mirrorPodByUID)
return pods, mirrorPods
}
func (pm *basicManager) GetPodByUID(uid types.UID) (*v1.Pod, bool) {
pm.lock.RLock()
defer pm.lock.RUnlock()
pod, ok := pm.podByUID[kubetypes.ResolvedPodUID(uid)] // Safe conversion, map only holds non-mirrors.
return pod, ok
}
func (pm *basicManager) GetPodByName(namespace, name string) (*v1.Pod, bool) {
podFullName := kubecontainer.BuildPodFullName(name, namespace)
return pm.GetPodByFullName(podFullName)
}
func (pm *basicManager) GetPodByFullName(podFullName string) (*v1.Pod, bool) {
pm.lock.RLock()
defer pm.lock.RUnlock()
pod, ok := pm.podByFullName[podFullName]
return pod, ok
}
func (pm *basicManager) TranslatePodUID(uid types.UID) kubetypes.ResolvedPodUID {
// It is safe to type convert to a resolved UID because type conversion is idempotent.
if uid == "" {
return kubetypes.ResolvedPodUID(uid)
}
pm.lock.RLock()
defer pm.lock.RUnlock()
if translated, ok := pm.translationByUID[kubetypes.MirrorPodUID(uid)]; ok {
return translated
}
return kubetypes.ResolvedPodUID(uid)
}
func (pm *basicManager) GetUIDTranslations() (podToMirror map[kubetypes.ResolvedPodUID]kubetypes.MirrorPodUID,
mirrorToPod map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID) {
pm.lock.RLock()
defer pm.lock.RUnlock()
podToMirror = make(map[kubetypes.ResolvedPodUID]kubetypes.MirrorPodUID, len(pm.translationByUID))
mirrorToPod = make(map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID, len(pm.translationByUID))
// Insert empty translation mapping for all static pods.
for uid, pod := range pm.podByUID {
if !kubetypes.IsStaticPod(pod) {
continue
}
podToMirror[uid] = ""
}
// Fill in translations. Notice that if there is no mirror pod for a
// static pod, its uid will be translated into empty string "". This
// is WAI, from the caller side we can know that the static pod doesn't
// have a corresponding mirror pod instead of using static pod uid directly.
for k, v := range pm.translationByUID {
mirrorToPod[k] = v
podToMirror[v] = k
}
return podToMirror, mirrorToPod
}
func (pm *basicManager) GetOrphanedMirrorPodNames() []string {
pm.lock.RLock()
defer pm.lock.RUnlock()
var podFullNames []string
for podFullName := range pm.mirrorPodByFullName {
if _, ok := pm.podByFullName[podFullName]; !ok {
podFullNames = append(podFullNames, podFullName)
}
}
return podFullNames
}
func (pm *basicManager) IsMirrorPodOf(mirrorPod, pod *v1.Pod) bool {
// Check name and namespace first.
if pod.Name != mirrorPod.Name || pod.Namespace != mirrorPod.Namespace {
return false
}
hash, ok := getHashFromMirrorPod(mirrorPod)
if !ok {
return false
}
return hash == getPodHash(pod)
}
func podsMapToPods(UIDMap map[kubetypes.ResolvedPodUID]*v1.Pod) []*v1.Pod {
pods := make([]*v1.Pod, 0, len(UIDMap))
for _, pod := range UIDMap {
pods = append(pods, pod)
}
return pods
}
func mirrorPodsMapToMirrorPods(UIDMap map[kubetypes.MirrorPodUID]*v1.Pod) []*v1.Pod {
pods := make([]*v1.Pod, 0, len(UIDMap))
for _, pod := range UIDMap {
pods = append(pods, pod)
}
return pods
}
func (pm *basicManager) GetMirrorPodByPod(pod *v1.Pod) (*v1.Pod, bool) {
pm.lock.RLock()
defer pm.lock.RUnlock()
mirrorPod, ok := pm.mirrorPodByFullName[kubecontainer.GetPodFullName(pod)]
return mirrorPod, ok
}
func (pm *basicManager) GetPodByMirrorPod(mirrorPod *v1.Pod) (*v1.Pod, bool) {
pm.lock.RLock()
defer pm.lock.RUnlock()
pod, ok := pm.podByFullName[kubecontainer.GetPodFullName(mirrorPod)]
return pod, ok
}