通过前面的文章我们知道kube-scheduler从待调度队列获取pod进行调度,那待调度队列中的pod是如何加入的?以及node列表是如何获取的?显而易见是通过informer机制实现的,addAllEventHandlers就是通过informer机制注册了对如下几种api资源的事件处理函数:
a. 新创建的未调度pod,通过FilteringResourceEventHandler对pod进行过滤
b. 调度成功的pod,通过FilteringResourceEventHandler对pod进行过滤
c. node资源的增删改
d. 对PV/PVC等资源的增删改
在创建调度后过程中,调度addAllEventHandlers注册对资源的事件处理函数
// New returns a Scheduler
func New(client clientset.Interface,
informerFactory informers.SharedInformerFactory,
recorderFactory profile.RecorderFactory,
stopCh <-chan struct{},
opts ...Option) (*Scheduler, error) {
...
//用来保存事件到插件的映射关系
clusterEventMap := make(map[framework.ClusterEvent]sets.String)
configurator := &Configurator{
...
clusterEventMap: clusterEventMap,
}
var sched *Scheduler
if options.legacyPolicySource == nil {
// Create the config from component config
//profile.NewMap->newProfile->NewFramework在创建framework时,会根据插件初始化clusterEventMap,见后面代码分析
sc, err := configurator.create()
if err != nil {
return nil, fmt.Errorf("couldn't create scheduler: %v", err)
}
sched = sc
}
...
addAllEventHandlers(sched, informerFactory, dynInformerFactory, unionedGVKs(clusterEventMap))
}
clusterEventMap
NewFramework在初始化插件时调用fillEventToPluginMap更新options.clusterEventMap,此map的key为事件ClusterEvent,value为插件的名字数组,即一个事件可能会对多个插件有影响,clusterEventMap也会传递给调度队列,在MoveAllToActiveOrBackoffQueue中被用来判断资源变化事件是否会影响对pod的调度,后面会详细说明
// NewFramework initializes plugins given the configuration and the registry.
func NewFramework(r Registry, profile *config.KubeSchedulerProfile, opts ...Option) (framework.Framework, error) {
options := defaultFrameworkOptions()
...
for name, factory := range r {
// initialize only needed plugins.
if _, ok := pg[name]; !ok {
continue
}
...
p, err := factory(args, f)
...
// Update ClusterEventMap in place.
fillEventToPluginMap(p, options.clusterEventMap)
}
}
var allClusterEvents = []framework.ClusterEvent{
{Resource: framework.Pod, ActionType: framework.All},
{Resource: framework.Node, ActionType: framework.All},
{Resource: framework.CSINode, ActionType: framework.All},
{Resource: framework.PersistentVolume, ActionType: framework.All},
{Resource: framework.PersistentVolumeClaim, ActionType: framework.All},
{Resource: framework.StorageClass, ActionType: framework.All},
}
func fillEventToPluginMap(p framework.Plugin, eventToPlugins map[framework.ClusterEvent]sets.String) {
//如果插件没有实现接口EnqueueExtensions,则注册allClusterEvents所有事件到插件上,表示allClusterEvents中的所有事件都有可能会影响插件对pod的调度
ext, ok := p.(framework.EnqueueExtensions)
if !ok {
// If interface EnqueueExtensions is not implemented, register the default events
// to the plugin. This is to ensure backward compatibility.
registerClusterEvents(p.Name(), eventToPlugins, allClusterEvents)
return
}
//获取插件提供的事件
events := ext.EventsToRegister()
// It's rare that a plugin implements EnqueueExtensions but returns nil.
// We treat it as: the plugin is not interested in any event, and hence pod failed by that plugin
// cannot be moved by any regular cluster event.
if len(events) == 0 {
klog.InfoS("Plugin's EventsToRegister() returned nil", "plugin", p.Name())
return
}
// The most common case: a plugin implements EnqueueExtensions and returns non-nil result.
registerClusterEvents(p.Name(), eventToPlugins, events)
}
//遍历evts,转换到eventToPlugins中,key为事件,value为插件的名字数组,即一个事件可能会对多个插件有影响
func registerClusterEvents(name string, eventToPlugins map[framework.ClusterEvent]sets.String, evts []framework.ClusterEvent) {
for _, evt := range evts {
if eventToPlugins[evt] == nil {
eventToPlugins[evt] = sets.NewString(name)
} else {
eventToPlugins[evt].Insert(name)
}
}
}
NodeName插件提供的事件表示Node的Add事件会影响它的pod调度的结果
func (pl *NodeName) EventsToRegister() []framework.ClusterEvent {
return []framework.ClusterEvent{
{Resource: framework.Node, ActionType: framework.Add},
}
}
ServiceAffinity插件提供的事件表示Pod的Delete事件,Node的Add/UpdateNodeLabel事件和Service的Update/Delete事件会影响对pod的调度,会使不可调度的pod变成可调度
// EventsToRegister returns the possible events that may make a Pod
// failed by this plugin schedulable.
func (pl *ServiceAffinity) EventsToRegister() []framework.ClusterEvent {
if len(pl.args.AffinityLabels) == 0 {
return nil
}
return []framework.ClusterEvent{
// Suppose there is a running Pod backs a Service, and the unschedulable Pod subjects
// to the same Service, but failed because of mis-matched affinity labels.
// - if the running Pod's labels get updated, it may not back the Service anymore, and
// hence make the unschedulable Pod schedulable.
// - if the running Pod gets deleted, the unschedulable Pod may also become schedulable.
{Resource: framework.Pod, ActionType: framework.Update | framework.Delete},
// A new Node or updating a Node's labels may make a Pod schedulable.
{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel},
// Update or delete of a Service may break the correlation of the Pods that previously
// backed it, and hence make a Pod schedulable.
{Resource: framework.Service, ActionType: framework.Update | framework.Delete},
}
}
unionedGVKs
unionedGVKs遍历clusterEventMap的key,转换成gvkMap(key为资源类型,value为action),然后将gvkMap传给函数addAllEventHandlers,在此函数中遍历gvkMap注册对这些资源事件变化处理函数
func unionedGVKs(m map[framework.ClusterEvent]sets.String) map[framework.GVK]framework.ActionType {
gvkMap := make(map[framework.GVK]framework.ActionType)
for evt := range m {
if _, ok := gvkMap[evt.Resource]; ok {
gvkMap[evt.Resource] |= evt.ActionType
} else {
gvkMap[evt.Resource] = evt.ActionType
}
}
return gvkMap
}
addAllEventHandlers
addAllEventHandlers用来注册资源变化事件的处理函数
func addAllEventHandlers(
sched *Scheduler,
informerFactory informers.SharedInformerFactory,
dynInformerFactory dynamicinformer.DynamicSharedInformerFactory,
gvkMap map[framework.GVK]framework.ActionType,
) {
//将调度成功的pod加入cache
informerFactory.Core().V1().Pods().Informer().AddEventHandler(
//这里增加了对资源的过滤,只有返回true了,才说明是目标事件,才会调用事件处理函数
cache.FilteringResourceEventHandler{
FilterFunc: func(obj interface{}) bool {
switch t := obj.(type) {
case *v1.Pod:
//assignedPod的实现为:len(pod.Spec.NodeName) != 0
//只对pod.Spec.NodeName不为空的pod感兴趣,有两种pod:
//一种是通过kube-scheduler成功调度,并bind到node成功的pod,
//另一种是在创建pod时就显示指定了NodeName的pod
return assignedPod(t)
case cache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok {
return assignedPod(pod)
}
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
return false
default:
utilruntime.HandleError(fmt.Errorf("unable to handle object in %T: %T", sched, obj))
return false
}
},
//增删改事件处理函数
Handler: cache.ResourceEventHandlerFuncs{
AddFunc: sched.addPodToCache,
UpdateFunc: sched.updatePodInCache,
DeleteFunc: sched.deletePodFromCache,
},
},
)
//将未调度的pod加入调度队列
informerFactory.Core().V1().Pods().Informer().AddEventHandler(
cache.FilteringResourceEventHandler{
FilterFunc: func(obj interface{}) bool {
switch t := obj.(type) {
case *v1.Pod:
//这里有两个条件,并且同时成立才可以
//a. pod.Spec.NodeName为空的pod
//b. responsibleForPod的实现为profiles.HandlesSchedulerName(pod.Spec.SchedulerName),
//即pod指定的调度器存在
return !assignedPod(t) && responsibleForPod(t, sched.Profiles)
case cache.DeletedFinalStateUnknown:
if pod, ok := t.Obj.(*v1.Pod); ok {
return !assignedPod(pod) && responsibleForPod(pod, sched.Profiles)
}
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
return false
default:
utilruntime.HandleError(fmt.Errorf("unable to handle object in %T: %T", sched, obj))
return false
}
},
//增删改事件处理函数
Handler: cache.ResourceEventHandlerFuncs{
AddFunc: sched.addPodToSchedulingQueue,
UpdateFunc: sched.updatePodInSchedulingQueue,
DeleteFunc: sched.deletePodFromSchedulingQueue,
},
},
)
//node事件的增删改处理函数,这里不用过滤
informerFactory.Core().V1().Nodes().Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: sched.addNodeToCache,
UpdateFunc: sched.updateNodeInCache,
DeleteFunc: sched.deleteNodeFromCache,
},
)
buildEvtResHandler := func(at framework.ActionType, gvk framework.GVK, shortGVK string) cache.ResourceEventHandlerFuncs {
funcs := cache.ResourceEventHandlerFuncs{}
if at&framework.Add != 0 {
evt := framework.ClusterEvent{Resource: gvk, ActionType: framework.Add, Label: fmt.Sprintf("%vAdd", shortGVK)}
funcs.AddFunc = func(_ interface{}) {
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(evt, nil)
}
}
if at&framework.Update != 0 {
evt := framework.ClusterEvent{Resource: gvk, ActionType: framework.Update, Label: fmt.Sprintf("%vUpdate", shortGVK)}
funcs.UpdateFunc = func(_, _ interface{}) {
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(evt, nil)
}
}
if at&framework.Delete != 0 {
evt := framework.ClusterEvent{Resource: gvk, ActionType: framework.Delete, Label: fmt.Sprintf("%vDelete", shortGVK)}
funcs.DeleteFunc = func(_ interface{}) {
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(evt, nil)
}
}
return funcs
}
//遍历gvkmap,注册对PV/PVC等资源的事件处理函数,不管增删改哪个事件发生,都有可能导致之前调度失败的pod调度成功,所以都会调用
//MoveAllToActiveOrBackoffQueue,用来将不可调度队列的pod移动到Active或者BackoffQueue
for gvk, at := range gvkMap {
switch gvk {
case framework.Node, framework.Pod:
// Do nothing.
case framework.CSINode:
informerFactory.Storage().V1().CSINodes().Informer().AddEventHandler(
buildEvtResHandler(at, framework.CSINode, "CSINode"),
)
case framework.CSIDriver:
informerFactory.Storage().V1().CSIDrivers().Informer().AddEventHandler(
buildEvtResHandler(at, framework.CSIDriver, "CSIDriver"),
)
case framework.CSIStorageCapacity:
informerFactory.Storage().V1beta1().CSIStorageCapacities().Informer().AddEventHandler(
buildEvtResHandler(at, framework.CSIStorageCapacity, "CSIStorageCapacity"),
)
case framework.PersistentVolume:
// MaxPDVolumeCountPredicate: since it relies on the counts of PV.
//
// PvAdd: Pods created when there are no PVs available will be stuck in
// unschedulable queue. But unbound PVs created for static provisioning and
// delay binding storage class are skipped in PV controller dynamic
// provisioning and binding process, will not trigger events to schedule pod
// again. So we need to move pods to active queue on PV add for this
// scenario.
//
// PvUpdate: Scheduler.bindVolumesWorker may fail to update assumed pod volume
// bindings due to conflicts if PVs are updated by PV controller or other
// parties, then scheduler will add pod back to unschedulable queue. We
// need to move pods to active queue on PV update for this scenario.
informerFactory.Core().V1().PersistentVolumes().Informer().AddEventHandler(
buildEvtResHandler(at, framework.PersistentVolume, "Pv"),
)
case framework.PersistentVolumeClaim:
// MaxPDVolumeCountPredicate: add/update PVC will affect counts of PV when it is bound.
informerFactory.Core().V1().PersistentVolumeClaims().Informer().AddEventHandler(
buildEvtResHandler(at, framework.PersistentVolumeClaim, "Pvc"),
)
case framework.StorageClass:
if at&framework.Add != 0 {
informerFactory.Storage().V1().StorageClasses().Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: sched.onStorageClassAdd,
},
)
}
if at&framework.Update != 0 {
informerFactory.Storage().V1().StorageClasses().Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
UpdateFunc: func(_, _ interface{}) {
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(queue.StorageClassUpdate, nil)
},
},
)
}
case framework.Service:
// ServiceAffinity: affected by the selector of the service is updated.
// Also, if new service is added, equivalence cache will also become invalid since
// existing pods may be "captured" by this service and change this predicate result.
informerFactory.Core().V1().Services().Informer().AddEventHandler(
buildEvtResHandler(at, framework.Service, "Service"),
)
default:
// Tests may not instantiate dynInformerFactory.
if dynInformerFactory == nil {
continue
}
// GVK is expected to be at least 3-folded, separated by dots.
// <kind in plural>.<version>.<group>
// Valid examples:
// - foos.v1.example.com
// - bars.v1beta1.a.b.c
// Invalid examples:
// - foos.v1 (2 sections)
// - foo.v1.example.com (the first section should be plural)
if strings.Count(string(gvk), ".") < 2 {
klog.ErrorS(nil, "incorrect event registration", "gvk", gvk)
continue
}
// Fall back to try dynamic informers.
gvr, _ := schema.ParseResourceArg(string(gvk))
dynInformer := dynInformerFactory.ForResource(*gvr).Informer()
dynInformer.AddEventHandler(
buildEvtResHandler(at, gvk, strings.Title(gvr.Resource)),
)
go dynInformer.Run(sched.StopEverything)
}
}
}
对于加了过滤的事件处理函数,Add事件并不是informer从apiserver获取的Add事件,而是经过下面的OnUpdate函数变化后的事件,比如对调度成功的pod的Add事件addPodToCache来说,这个pod显然不是刚创建的,因为刚创建的pod对应的Add事件是addPodToSchedulingQueue,当pod调度成功并bind成功后,从apiserver获取的应该是update事件,对应的处理函数为OnUpdate,在此函数中r.FilterFunc的过滤条件是pod.Spec.NodeName不为空,对于newObj来说成立,对于oldObj来说不成立,所以对应的case为newer && !older,调用的r.Handler.OnAdd才是addPodToCache
// OnUpdate ensures the proper handler is called depending on whether the filter matches
func (r FilteringResourceEventHandler) OnUpdate(oldObj, newObj interface{}) {
newer := r.FilterFunc(newObj)
older := r.FilterFunc(oldObj)
switch {
case newer && older:
r.Handler.OnUpdate(oldObj, newObj)
case newer && !older:
r.Handler.OnAdd(newObj)
case !newer && older:
r.Handler.OnDelete(oldObj)
default:
// do nothing
}
}
下面看几个重要的事件处理函数
addPodToCache
addPodToCache将调度成功的pod加入cache中,如果pod还在假定状态(正常调度的pod应该都在假定状态),则将其从假定中删除,如果不在了,说明已经超时了,需要将pod添加回来。同时还要将不可调度队列中和pod有亲和性的pod移动到Active或者BackoffQueue
func (sched *Scheduler) addPodToCache(obj interface{}) {
pod, ok := obj.(*v1.Pod)
if !ok {
klog.ErrorS(nil, "Cannot convert to *v1.Pod", "obj", obj)
return
}
klog.V(3).InfoS("Add event for scheduled pod", "pod", klog.KObj(pod))
//如果pod还在假定状态(正常调度的pod应该都在假定状态),则将其从假定中删除,如果不在了,说明已经超时了,需要将pod添加回来
if err := sched.SchedulerCache.AddPod(pod); err != nil {
klog.ErrorS(err, "Scheduler cache AddPod failed", "pod", klog.KObj(pod))
}
//将不可调度队列中和pod有亲和性的pod移动到Active或者BackoffQueue
sched.SchedulingQueue.AssignedPodAdded(pod)
}
deletePodFromCache
deletePodFromCache将pod从cache中删除,同时还要将和被删除pod有反亲和性的pod移动到Active或者BackoffQueue
func (sched *Scheduler) deletePodFromCache(obj interface{}) {
var pod *v1.Pod
switch t := obj.(type) {
case *v1.Pod:
pod = t
case cache.DeletedFinalStateUnknown:
var ok bool
pod, ok = t.Obj.(*v1.Pod)
if !ok {
klog.ErrorS(nil, "Cannot convert to *v1.Pod", "obj", t.Obj)
return
}
default:
klog.ErrorS(nil, "Cannot convert to *v1.Pod", "obj", t)
return
}
klog.V(3).InfoS("Delete event for scheduled pod", "pod", klog.KObj(pod))
// NOTE: Updates must be written to scheduler cache before invalidating
// equivalence cache, because we could snapshot equivalence cache after the
// invalidation and then snapshot the cache itself. If the cache is
// snapshotted before updates are written, we would update equivalence
// cache with stale information which is based on snapshot of old cache.
if err := sched.SchedulerCache.RemovePod(pod); err != nil {
klog.ErrorS(err, "Scheduler cache RemovePod failed", "pod", klog.KObj(pod))
}
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(queue.AssignedPodDelete, nil)
}
addPodToSchedulingQueue
addPodToSchedulingQueue将新创建的pod加入到调度队列等待被调度
func (sched *Scheduler) addPodToSchedulingQueue(obj interface{}) {
pod := obj.(*v1.Pod)
klog.V(3).InfoS("Add event for unscheduled pod", "pod", klog.KObj(pod))
if err := sched.SchedulingQueue.Add(pod); err != nil {
utilruntime.HandleError(fmt.Errorf("unable to queue %T: %v", obj, err))
}
}
deletePodFromSchedulingQueue
deletePodFromSchedulingQueue将pod从调度队列中删除,比如刚创建的pod还未调度又被删除了
func (sched *Scheduler) deletePodFromSchedulingQueue(obj interface{}) {
var pod *v1.Pod
switch t := obj.(type) {
case *v1.Pod:
pod = obj.(*v1.Pod)
case cache.DeletedFinalStateUnknown:
var ok bool
pod, ok = t.Obj.(*v1.Pod)
if !ok {
utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched))
return
}
default:
utilruntime.HandleError(fmt.Errorf("unable to handle object in %T: %T", sched, obj))
return
}
klog.V(3).InfoS("Delete event for unscheduled pod", "pod", klog.KObj(pod))
if err := sched.SchedulingQueue.Delete(pod); err != nil {
utilruntime.HandleError(fmt.Errorf("unable to dequeue %T: %v", obj, err))
}
fwk, err := sched.frameworkForPod(pod)
if err != nil {
// This shouldn't happen, because we only accept for scheduling the pods
// which specify a scheduler name that matches one of the profiles.
klog.ErrorS(err, "Unable to get profile", "pod", klog.KObj(pod))
return
}
// If a waiting pod is rejected, it indicates it's previously assumed and we're
// removing it from the scheduler cache. In this case, signal a AssignedPodDelete
// event to immediately retry some unscheduled Pods.
if fwk.RejectWaitingPod(pod.UID) {
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(queue.AssignedPodDelete, nil)
}
}
addNodeToCache
addNodeToCache将node添加到cache中,同时还会将不可调度队列中的pod移动到Active或者BackoffQueue
func (sched *Scheduler) addNodeToCache(obj interface{}) {
node, ok := obj.(*v1.Node)
if !ok {
klog.ErrorS(nil, "Cannot convert to *v1.Node", "obj", obj)
return
}
nodeInfo := sched.SchedulerCache.AddNode(node)
klog.V(3).InfoS("Add event for node", "node", klog.KObj(node))
sched.SchedulingQueue.MoveAllToActiveOrBackoffQueue(queue.NodeAdd, preCheckForNode(nodeInfo))
}
deleteNodeFromCache
deleteNodeFromCache将node信息从pod中删除
func (sched *Scheduler) deleteNodeFromCache(obj interface{}) {
var node *v1.Node
switch t := obj.(type) {
case *v1.Node:
node = t
case cache.DeletedFinalStateUnknown:
var ok bool
node, ok = t.Obj.(*v1.Node)
if !ok {
klog.ErrorS(nil, "Cannot convert to *v1.Node", "obj", t.Obj)
return
}
default:
klog.ErrorS(nil, "Cannot convert to *v1.Node", "obj", t)
return
}
klog.V(3).InfoS("Delete event for node", "node", klog.KObj(node))
// NOTE: Updates must be written to scheduler cache before invalidating
// equivalence cache, because we could snapshot equivalence cache after the
// invalidation and then snapshot the cache itself. If the cache is
// snapshotted before updates are written, we would update equivalence
// cache with stale information which is based on snapshot of old cache.
if err := sched.SchedulerCache.RemoveNode(node); err != nil {
klog.ErrorS(err, "Scheduler cache RemoveNode failed")
}
}
MoveAllToActiveOrBackoffQueue
MoveAllToActiveOrBackoffQueue用来将不可调度队列中的pod移动到activeQ或者backoffQ,这里不是无条件移动的,需满足两个条件:
a. 通过了preCheck的检查
b. 通过了event的校验
// MoveAllToActiveOrBackoffQueue moves all pods from unschedulableQ to activeQ or backoffQ.
// This function adds all pods and then signals the condition variable to ensure that
// if Pop() is waiting for an item, it receives the signal after all the pods are in the
// queue and the head is the highest priority pod.
func (p *PriorityQueue) MoveAllToActiveOrBackoffQueue(event framework.ClusterEvent, preCheck PreEnqueueCheck) {
p.lock.Lock()
defer p.lock.Unlock()
unschedulablePods := make([]*framework.QueuedPodInfo, 0, len(p.unschedulableQ.podInfoMap))
for _, pInfo := range p.unschedulableQ.podInfoMap {
if preCheck == nil || preCheck(pInfo.Pod) {
unschedulablePods = append(unschedulablePods, pInfo)
}
}
p.movePodsToActiveOrBackoffQueue(unschedulablePods, event)
}
// NOTE: this function assumes lock has been acquired in caller
func (p *PriorityQueue) movePodsToActiveOrBackoffQueue(podInfoList []*framework.QueuedPodInfo, event framework.ClusterEvent) {
moved := false
for _, pInfo := range podInfoList {
// If the event doesn't help making the Pod schedulable, continue.
// Note: we don't run the check if pInfo.UnschedulablePlugins is nil, which denotes
// either there is some abnormal error, or scheduling the pod failed by plugins other than PreFilter, Filter and Permit.
// In that case, it's desired to move it anyways.
//如果导致pod调度失败的插件UnschedulablePlugins不为空,并且podMatchesEvent返回true,则将pod移动到ActiveQ或者BackoffQ
if len(pInfo.UnschedulablePlugins) != 0 && !p.podMatchesEvent(pInfo, event) {
continue
}
moved = true
pod := pInfo.Pod
if p.isPodBackingoff(pInfo) {
if err := p.podBackoffQ.Add(pInfo); err != nil {
klog.ErrorS(err, "Error adding pod to the backoff queue", "pod", klog.KObj(pod))
} else {
metrics.SchedulerQueueIncomingPods.WithLabelValues("backoff", event.Label).Inc()
p.unschedulableQ.delete(pod)
}
} else {
if err := p.activeQ.Add(pInfo); err != nil {
klog.ErrorS(err, "Error adding pod to the scheduling queue", "pod", klog.KObj(pod))
} else {
metrics.SchedulerQueueIncomingPods.WithLabelValues("active", event.Label).Inc()
p.unschedulableQ.delete(pod)
}
}
}
p.moveRequestCycle = p.schedulingCycle
if moved {
p.cond.Broadcast()
}
}
podMatchesEvent判断clusterEvent是否有可能会使不可调度pod变成可调度
func (p *PriorityQueue) podMatchesEvent(podInfo *framework.QueuedPodInfo, clusterEvent framework.ClusterEvent) bool {
//clusterEvent.IsWildCard的实现为:ce.Resource == WildCard && ce.ActionType == All,
//表示所有资源的所有action,则直接返回true
if clusterEvent.IsWildCard() {
return true
}
//遍历clusterEventMap,此map保存的是事件到插件的映射,这里根据传进来的事件clusterEvent判断导致pod调度失败的插件是否存在,
//如果存在表示事件clusterEvent可能导致pod可被调度,则返回true
for evt, nameSet := range p.clusterEventMap {
// Firstly verify if the two ClusterEvents match:
// - either the registered event from plugin side is a WildCardEvent,
// - or the two events have identical Resource fields and *compatible* ActionType.
// Note the ActionTypes don't need to be *identical*. We check if the ANDed value
// is zero or not. In this way, it's easy to tell Update&Delete is not compatible,
// but Update&All is.
//
evtMatch := evt.IsWildCard() ||
(evt.Resource == clusterEvent.Resource && evt.ActionType&clusterEvent.ActionType != 0)
// Secondly verify the plugin name matches.
// Note that if it doesn't match, we shouldn't continue to search.
if evtMatch && intersect(nameSet, podInfo.UnschedulablePlugins) {
return true
}
}
return false
}