研究一下kube-scheduler是如何构建Scheduler对象以及如何进行算法插件式注入
代码入口kubernetes\cmd\kube-scheduler\scheduler.go
首先构建SchedulerCommand对象
command := app.NewSchedulerCommand()
接下来执行runCommand
runCommand(cmd, args, opts)
根据配置信息启动Scheduler
Run(cc, stopCh)
构建scheduler对象
// Create the scheduler.
sched, err := scheduler.New(cc.Client,
cc.InformerFactory.Core().V1().Nodes(),
cc.PodInformer,
cc.InformerFactory.Core().V1().PersistentVolumes(),
cc.InformerFactory.Core().V1().PersistentVolumeClaims(),
cc.InformerFactory.Core().V1().ReplicationControllers(),
cc.InformerFactory.Apps().V1().ReplicaSets(),
cc.InformerFactory.Apps().V1().StatefulSets(),
cc.InformerFactory.Core().V1().Services(),
cc.InformerFactory.Policy().V1beta1().PodDisruptionBudgets(),
storageClassInformer,
cc.Recorder,
cc.ComponentConfig.AlgorithmSource,
stopCh,
scheduler.WithName(cc.ComponentConfig.SchedulerName),
scheduler.WithHardPodAffinitySymmetricWeight(cc.ComponentConfig.HardPodAffinitySymmetricWeight),
scheduler.WithEquivalenceClassCacheEnabled(cc.ComponentConfig.EnableContentionProfiling),
scheduler.WithPreemptionDisabled(cc.ComponentConfig.DisablePreemption),
scheduler.WithPercentageOfNodesToScore(cc.ComponentConfig.PercentageOfNodesToScore),
scheduler.WithBindTimeoutSeconds(*cc.ComponentConfig.BindTimeoutSeconds))
执行run方法
// Prepare a reusable runCommand function.
run := func(ctx context.Context) {
sched.Run()
<-ctx.Done()
}
接下来就是Scheduler的启动函数
func (sched *Scheduler) Run() {
if !sched.config.WaitForCacheSync() {
return
}
go wait.Until(sched.scheduleOne, 0, sched.config.StopEverything)
}
研究一下Run(cc, stopCh)中配置参数cc的由来:
func runCommand(cmd *cobra.Command, args []string, opts *options.Options) error
由Command对象以及命令行参数构建Config对象
c, err := opts.Config()
// Config return a scheduler config object
func (o *Options) Config() (*schedulerappconfig.Config, error) {
if o.SecureServing != nil {
if err := o.SecureServing.MaybeDefaultWithSelfSignedCerts("localhost", nil, []net.IP{net.ParseIP("127.0.0.1")}); err != nil {
return nil, fmt.Errorf("error creating self-signed certificates: %v", err)
}
}
c := &schedulerappconfig.Config{}
if err := o.ApplyTo(c); err != nil {
return nil, err
}
// Prepare kube clients.
client, leaderElectionClient, eventClient, err := createClients(c.ComponentConfig.ClientConnection, o.Master, c.ComponentConfig.LeaderElection.RenewDeadline.Duration)
if err != nil {
return nil, err
}
// Prepare event clients.
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(legacyscheme.Scheme, corev1.EventSource{Component: c.ComponentConfig.SchedulerName})
// Set up leader election if enabled.
var leaderElectionConfig *leaderelection.LeaderElectionConfig
if c.ComponentConfig.LeaderElection.LeaderElect {
leaderElectionConfig, err = makeLeaderElectionConfig(c.ComponentConfig.LeaderElection, leaderElectionClient, recorder)
if err != nil {
return nil, err
}
}
c.Client = client
c.InformerFactory = informers.NewSharedInformerFactory(client, 0)
c.PodInformer = factory.NewPodInformer(client, 0)
c.EventClient = eventClient
c.Recorder = recorder
c.Broadcaster = eventBroadcaster
c.LeaderElection = leaderElectionConfig
return c, nil
}
cc := c.Complete()
// Complete fills in any fields not set that are required to have valid data. It's mutating the receiver.
func (c *Config) Complete() CompletedConfig {
cc := completedConfig{c}
if c.InsecureServing != nil {
c.InsecureServing.Name = "healthz"
}
if c.InsecureMetricsServing != nil {
c.InsecureMetricsServing.Name = "metrics"
}
apiserver.AuthorizeClientBearerToken(c.LoopbackClientConfig, &c.Authentication, &c.Authorization)
return CompletedConfig{&cc}
}
algorithmprovider.ApplyFeatureGates()
Run(cc, stopCh)
Config对象
Scheduler上下文配置
// Config has all the context to run a Scheduler
type Config struct {
// config is the scheduler server's configuration object.
ComponentConfig kubeschedulerconfig.KubeSchedulerConfiguration
// LoopbackClientConfig is a config for a privileged loopback connection
LoopbackClientConfig *restclient.Config
InsecureServing *apiserver.DeprecatedInsecureServingInfo // nil will disable serving on an insecure port
InsecureMetricsServing *apiserver.DeprecatedInsecureServingInfo // non-nil if metrics should be served independently
Authentication apiserver.AuthenticationInfo
Authorization apiserver.AuthorizationInfo
SecureServing *apiserver.SecureServingInfo
Client clientset.Interface
InformerFactory informers.SharedInformerFactory
PodInformer coreinformers.PodInformer
EventClient v1core.EventsGetter
Recorder record.EventRecorder
Broadcaster record.EventBroadcaster
// LeaderElection is optional.
LeaderElection *leaderelection.LeaderElectionConfig
}
scheduler server配置信息
// KubeSchedulerConfiguration configures a scheduler
type KubeSchedulerConfiguration struct {
metav1.TypeMeta
// SchedulerName is name of the scheduler, used to select which pods
// will be processed by this scheduler, based on pod's "spec.SchedulerName".
SchedulerName string
// AlgorithmSource specifies the scheduler algorithm source.
AlgorithmSource SchedulerAlgorithmSource
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// HardPodAffinitySymmetricWeight represents the weight of implicit PreferredDuringScheduling affinity rule, in the range 0-100.
HardPodAffinitySymmetricWeight int32
// LeaderElection defines the configuration of leader election client.
LeaderElection KubeSchedulerLeaderElectionConfiguration
// ClientConnection specifies the kubeconfig file and client connection
// settings for the proxy server to use when communicating with the apiserver.
ClientConnection apimachineryconfig.ClientConnectionConfiguration
// HealthzBindAddress is the IP address and port for the health check server to serve on,
// defaulting to 0.0.0.0:10251
HealthzBindAddress string
// MetricsBindAddress is the IP address and port for the metrics server to
// serve on, defaulting to 0.0.0.0:10251.
MetricsBindAddress string
// DebuggingConfiguration holds configuration for Debugging related features
// TODO: We might wanna make this a substruct like Debugging apiserverconfig.DebuggingConfiguration
apiserverconfig.DebuggingConfiguration
// DisablePreemption disables the pod preemption feature.
DisablePreemption bool
// PercentageOfNodeToScore is the percentage of all nodes that once found feasible
// for running a pod, the scheduler stops its search for more feasible nodes in
// the cluster. This helps improve scheduler's performance. Scheduler always tries to find
// at least "minFeasibleNodesToFind" feasible nodes no matter what the value of this flag is.
// Example: if the cluster size is 500 nodes and the value of this flag is 30,
// then scheduler stops finding further feasible nodes once it finds 150 feasible ones.
// When the value is 0, default percentage (50%) of the nodes will be scored.
PercentageOfNodesToScore int32
// DEPRECATED.
// Indicate the "all topologies" set for empty topologyKey when it's used for PreferredDuringScheduling pod anti-affinity.
FailureDomains string
// Duration to wait for a binding operation to complete before timing out
// Value must be non-negative integer. The value zero indicates no waiting.
// If this value is nil, the default value will be used.
BindTimeoutSeconds *int64
}
默认算法的设定过程:kubernetes\pkg\scheduler\apis\config\v1alpha1\register.go
func init() {
// We only register manually written functions here. The registration of the
// generated functions takes place in the generated files. The separation
// makes the code compile even when the generated files are missing.
localSchemeBuilder.Register(addDefaultingFuncs)
}
func addDefaultingFuncs(scheme *runtime.Scheme) error {
return RegisterDefaults(scheme)
}
func RegisterDefaults(scheme *runtime.Scheme) error {
scheme.AddTypeDefaultingFunc(&v1alpha1.KubeSchedulerConfiguration{}, func(obj interface{}) {
SetObjectDefaults_KubeSchedulerConfiguration(obj.(*v1alpha1.KubeSchedulerConfiguration))
})
return nil
}
func SetObjectDefaults_KubeSchedulerConfiguration(in *v1alpha1.KubeSchedulerConfiguration) {
SetDefaults_KubeSchedulerConfiguration(in)
}
// SetDefaults_KubeSchedulerConfiguration sets additional defaults
func SetDefaults_KubeSchedulerConfiguration(obj *kubescedulerconfigv1alpha1.KubeSchedulerConfiguration) {
if len(obj.SchedulerName) == 0 {
obj.SchedulerName = api.DefaultSchedulerName
}
if obj.HardPodAffinitySymmetricWeight == 0 {
obj.HardPodAffinitySymmetricWeight = api.DefaultHardPodAffinitySymmetricWeight
}
if obj.AlgorithmSource.Policy == nil &&
(obj.AlgorithmSource.Provider == nil || len(*obj.AlgorithmSource.Provider) == 0) {
val := kubescedulerconfigv1alpha1.SchedulerDefaultProviderName
obj.AlgorithmSource.Provider = &val
}
if policy := obj.AlgorithmSource.Policy; policy != nil {
if policy.ConfigMap != nil && len(policy.ConfigMap.Namespace) == 0 {
obj.AlgorithmSource.Policy.ConfigMap.Namespace = api.NamespaceSystem
}
}
if host, port, err := net.SplitHostPort(obj.HealthzBindAddress); err == nil {
if len(host) == 0 {
host = "0.0.0.0"
}
obj.HealthzBindAddress = net.JoinHostPort(host, port)
} else {
obj.HealthzBindAddress = net.JoinHostPort("0.0.0.0", strconv.Itoa(ports.InsecureSchedulerPort))
}
if host, port, err := net.SplitHostPort(obj.MetricsBindAddress); err == nil {
if len(host) == 0 {
host = "0.0.0.0"
}
obj.MetricsBindAddress = net.JoinHostPort(host, port)
} else {
obj.MetricsBindAddress = net.JoinHostPort("0.0.0.0", strconv.Itoa(ports.InsecureSchedulerPort))
}
if len(obj.LeaderElection.LockObjectNamespace) == 0 {
obj.LeaderElection.LockObjectNamespace = kubescedulerconfigv1alpha1.SchedulerDefaultLockObjectNamespace
}
if len(obj.LeaderElection.LockObjectName) == 0 {
obj.LeaderElection.LockObjectName = kubescedulerconfigv1alpha1.SchedulerDefaultLockObjectName
}
if obj.PercentageOfNodesToScore == 0 {
// by default, stop finding feasible nodes once the number of feasible nodes is 50% of the cluster.
obj.PercentageOfNodesToScore = 50
}
if len(obj.FailureDomains) == 0 {
obj.FailureDomains = kubeletapis.DefaultFailureDomains
}
if len(obj.ClientConnection.ContentType) == 0 {
obj.ClientConnection.ContentType = "application/vnd.kubernetes.protobuf"
}
// Scheduler has an opinion about QPS/Burst, setting specific defaults for itself, instead of generic settings.
if obj.ClientConnection.QPS == 0.0 {
obj.ClientConnection.QPS = 50.0
}
if obj.ClientConnection.Burst == 0 {
obj.ClientConnection.Burst = 100
}
// Use the default LeaderElectionConfiguration options
apiserverconfigv1alpha1.RecommendedDefaultLeaderElectionConfiguration(&obj.LeaderElection.LeaderElectionConfiguration)
if obj.BindTimeoutSeconds == nil {
defaultBindTimeoutSeconds := int64(600)
obj.BindTimeoutSeconds = &defaultBindTimeoutSeconds
}
}
可知默认调度算法是由register.go初始化完成的。
调度器名字
if len(obj.SchedulerName) == 0 {
obj.SchedulerName = api.DefaultSchedulerName
}
const (
// "default-scheduler" is the name of default scheduler.
DefaultSchedulerName = "default-scheduler"
// RequiredDuringScheduling affinity is not symmetric, but there is an implicit PreferredDuringScheduling affinity rule
// corresponding to every RequiredDuringScheduling affinity rule.
// When the --hard-pod-affinity-weight scheduler flag is not specified,
// DefaultHardPodAffinityWeight defines the weight of the implicit PreferredDuringScheduling affinity rule.
DefaultHardPodAffinitySymmetricWeight int32 = 1
)
Provider提供者
if obj.AlgorithmSource.Policy == nil &&
(obj.AlgorithmSource.Provider == nil || len(*obj.AlgorithmSource.Provider) == 0) {
val := kubescedulerconfigv1alpha1.SchedulerDefaultProviderName
obj.AlgorithmSource.Provider = &val
}
const (
// SchedulerDefaultLockObjectNamespace defines default scheduler lock object namespace ("kube-system")
SchedulerDefaultLockObjectNamespace string = metav1.NamespaceSystem
// SchedulerDefaultLockObjectName defines default scheduler lock object name ("kube-scheduler")
SchedulerDefaultLockObjectName = "kube-scheduler"
// SchedulerDefaultProviderName defines the default provider names
SchedulerDefaultProviderName = "DefaultProvider"
)