deployment controller源码阅读(1.18.x)
deployment是最常见的部署,官网举了一下deployment的常用实例,涵盖创建、声明、回滚、扩容、清理。
- 创建 Deployment 以将 ReplicaSet 上线。 ReplicaSet 在后台创建 Pods。 检查 ReplicaSet 的上线状态,查看其是否成功。
- 通过更新 Deployment 的 PodTemplateSpec,声明 Pod 的新状态 。 新的 ReplicaSet 会被创建,Deployment 以受控速率将 Pod 从旧 ReplicaSet 迁移到新 ReplicaSet。 每个新的 ReplicaSet 都会更新 Deployment 的修订版本。
- 如果 Deployment 的当前状态不稳定,回滚到较早的 Deployment 版本。 每次回滚都会更新 Deployment 的修订版本。
- 扩大 Deployment 规模以承担更多负载。
- 暂停 Deployment 以应用对 PodTemplateSpec 所作的多项修改, 然后恢复其执行以启动新的上线版本。
- 使用 Deployment 状态 来判定上线过程是否出现停滞。
- 清理较旧的不再需要的 ReplicaSet 。
- 创建 Deployment
----摘自k8s官网
阿巴阿巴了这么多其实就是表达一下他很重要,其实好像就没有不重要的模块哈哈哈哈,先上个图理一下deployment、replicaSet 和 pod 之间的关系,deployment通过控制replicaSet控制pod,大概就这个结构

replicaSet负责具体实施,DeploymentInformer、ReplicaSetInformer、PodInformer负责监听,当资源发生变化,会触发DeploymentController里的轮询条件进行对应的操作
老样子先去找代码在哪里,DeploymentController的启动代码在cmd/kube-controller-mannager/controllermanager.go里面
func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error {
// 记录版本
klog.Infof("Version: %+v", version.Get())
if cfgz, err := configz.New(ConfigzName); err == nil {
cfgz.Set(c.ComponentConfig)
} else {
klog.Errorf("unable to register configz: %v", err)
}
// 设置将要使用的所有healthz检查
var checks []healthz.HealthChecker
var electionChecker *leaderelection.HealthzAdaptor
if c.ComponentConfig.Generic.LeaderElection.LeaderElect {
electionChecker = leaderelection.NewLeaderHealthzAdaptor(time.Second * 20)
checks = append(checks, electionChecker)
}
// 启动控制器管理器HTTP服务器
var unsecuredMux *mux.PathRecorderMux
if c.SecureServing != nil {
unsecuredMux = genericcontrollermanager.NewBaseHandler(&c.ComponentConfig.Generic.Debugging, checks...)
handler := genericcontrollermanager.BuildHandlerChain(unsecuredMux, &c.Authorization, &c.Authentication)
// 处理c.SecureServing.Serve返回的stoppedCh
if _, err := c.SecureServing.Serve(handler, 0, stopCh); err != nil {
return err
}
}
if c.InsecureServing != nil {
unsecuredMux = genericcontrollermanager.NewBaseHandler(&c.ComponentConfig.Generic.Debugging, checks...)
insecureSuperuserAuthn := server.AuthenticationInfo{Authenticator: &server.InsecureSuperuser{}}
handler := genericcontrollermanager.BuildHandlerChain(unsecuredMux, nil, &insecureSuperuserAuthn)
if err := c.InsecureServing.Serve(handler, 0, stopCh); err != nil {
return err
}
}
run := func(ctx context.Context) {
rootClientBuilder := clientbuilder.SimpleControllerClientBuilder{
ClientConfig: c.Kubeconfig,
}
var clientBuilder clientbuilder.ControllerClientBuilder
if c.ComponentConfig.KubeCloudShared.UseServiceAccountCredentials {
if len(c.ComponentConfig.SAController.ServiceAccountKeyFile) == 0 {
// 另一个控制器进程可能正在为我们创建令牌。如果不是,我们将在客户端构建器无法创建令牌时超时并退出。
klog.Warningf("--use-service-account-credentials was specified without providing a --service-account-private-key-file")
}
if shouldTurnOnDynamicClient(c.Client) {
klog.V(1).Infof("using dynamic client builder")
// 动态构建器将使用TokenRequest功能并定期刷新服务帐户令牌
clientBuilder = controller.NewDynamicClientBuilder(
restclient.AnonymousClientConfig(c.Kubeconfig),
c.Client.CoreV1(),
"kube-system")
} else {
klog.V(1).Infof("using legacy client builder")
clientBuilder = clientbuilder.SAControllerClientBuilder{
ClientConfig: restclient.AnonymousClientConfig(c.Kubeconfig),
CoreClient: c.Client.CoreV1(),
AuthenticationClient: c.Client.AuthenticationV1(),
Namespace: "kube-system",
}
}
} else {
clientBuilder = rootClientBuilder
}
controllerContext, err := CreateControllerContext(c, rootClientBuilder, clientBuilder, ctx.Done())
if err != nil {
klog.Fatalf("error building controller context: %v", err)
}
saTokenControllerInitFunc := serviceAccountTokenControllerStarter{rootClientBuilder: rootClientBuilder}.startServiceAccountTokenController
// 调用NewControllerInitializers初始化controller,StartControllers启动controller
if err := StartControllers(controllerContext, saTokenControllerInitFunc, NewControllerInitializers(controllerContext.LoopMode), unsecuredMux); err != nil {
klog.Fatalf("error starting controllers: %v", err)
}
controllerContext.InformerFactory.Start(controllerContext.Stop)
controllerContext.ObjectOrMetadataInformerFactory.Start(controllerContext.Stop)
close(controllerContext.InformersStarted)
select {}
}
if !c.ComponentConfig.Generic.LeaderElection.LeaderElect {
run(context.TODO())
panic("unreachable")
}
id, err := os.Hostname()
if err != nil {
return err
}
// 添加一个锁,保证同一主机上的两个进程不会意外地都变为活动状态
id = id + "_" + string(uuid.NewUUID())
rl, err := resourcelock.New(c.ComponentConfig.Generic.LeaderElection.ResourceLock,
c.ComponentConfig.Generic.LeaderElection.ResourceNamespace,
c.ComponentConfig.Generic.LeaderElection.ResourceName,
c.LeaderElectionClient.CoreV1(),
c.LeaderElectionClient.CoordinationV1(),
resourcelock.ResourceLockConfig{
Identity: id,
EventRecorder: c.EventRecorder,
})
if err != nil {
klog.Fatalf("error creating lock: %v", err)
}
leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{
Lock: rl,
LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration,
RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration,
RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: run,
OnStoppedLeading: func() {
klog.Fatalf("leaderelection lost")
},
},
WatchDog: electionChecker,
Name: "kube-controller-manager",
})
panic("unreachable")
}
然后看一下NewControllerInitializers和StartControllers里面的东西
//这一大堆堆controller都是要初始化的,map开之后调各自对应的start方法执行具体的初始化操作,其中deployment的就是startDeploymentController方法,马上我们去找他
func NewControllerInitializers(loopMode ControllerLoopMode) map[string]InitFunc {
controllers := map[string]InitFunc{}
controllers["endpoint"] = startEndpointController
controllers["endpointslice"] = startEndpointSliceController
controllers["endpointslicemirroring"] = startEndpointSliceMirroringController
controllers["replicationcontroller"] = startReplicationController
controllers["podgc"] = startPodGCController
controllers["resourcequota"] = startResourceQuotaController
controllers["namespace"] = startNamespaceController
controllers["serviceaccount"] = startServiceAccountController
controllers["garbagecollector"] = startGarbageCollectorController
controllers["daemonset"] = startDaemonSetController
controllers["job"] = startJobController
controllers["deployment"] = startDeploymentController
controllers["replicaset"] = startReplicaSetController
controllers["horizontalpodautoscaling"] = startHPAController
controllers["disruption"] = startDisruptionController
controllers["statefulset"] = startStatefulSetController
controllers["cronjob"] = startCronJobController
controllers["csrsigning"] = startCSRSigningController
controllers["csrapproving"] = startCSRApprovingController
controllers["csrcleaner"] = startCSRCleanerController
controllers["ttl"] = startTTLController
controllers["bootstrapsigner"] = startBootstrapSignerController
controllers["tokencleaner"] = startTokenCleanerController
controllers["nodeipam"] = startNodeIpamController
controllers["nodelifecycle"] = startNodeLifecycleController
if loopMode == IncludeCloudLoops {
controllers["service"] = startServiceController
controllers["route"] = startRouteController
controllers["cloud-node-lifecycle"] = startCloudNodeLifecycleController
// TODO: volume controller into the IncludeCloudLoops only set.
}
controllers["persistentvolume-binder"] = startPersistentVolumeBinderController
controllers["attachdetach"] = startAttachDetachController
controllers["persistentvolume-expander"] = startVolumeExpandController
controllers["clusterrole-aggregation"] = startClusterRoleAggregrationController
controllers["pvc-protection"] = startPVCProtectionController
controllers["pv-protection"] = startPVProtectionController
controllers["ttl-after-finished"] = startTTLAfterFinishedController
controllers["root-ca-cert-publisher"] = startRootCACertPublisher
controllers["ephemeral-volume"] = startEphemeralVolumeController
return controllers
}
//这个函数在同级目录的app.go里面,一起在里面的还有DaemonSet、StatefulSet的Controller初始化方法,这个以后再说
func startDeploymentController(ctx ControllerContext) (http.Handler, bool, error) {
if !ctx.AvailableResources[schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "deployments"}] {
return nil, false, nil
}
//初始化controller,
dc, err := deployment.NewDeploymentController(
ctx.InformerFactory.Apps().V1().Deployments(),
ctx.InformerFactory.Apps().V1().ReplicaSets(),
ctx.InformerFactory.Core().V1().Pods(),
ctx.ClientBuilder.ClientOrDie("deployment-controller"),
)
if err != nil {
return nil, true, fmt.Errorf("error creating Deployment controller: %v", err)
}
//启动controller
go dc.Run(int(ctx.ComponentConfig.DeploymentController.ConcurrentDeploymentSyncs), ctx.Stop)
return nil, true, nil
}
//开始观察和同步
func (dc *DeploymentController) Run(workers int, stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer dc.queue.ShutDown()
klog.Infof("Starting deployment controller")
defer klog.Infof("Shutting down deployment controller")
//等informer cache同步完成
if !cache.WaitForNamedCacheSync("deployment", stopCh, dc.dListerSynced, dc.rsListerSynced, dc.podListerSynced) {
return
}
//启动workers个goroutine,每一个都调用dc.worker
for i := 0; i < workers; i++ {
go wait.Until(dc.worker, time.Second, stopCh)
}
<-stopCh
}
//work又调用processNextWorkItem
func (dc *DeploymentController) worker() {
for dc.processNextWorkItem() {
}
}
//processNextWorkItem终于调用到正主syncHandler了
func (dc *DeploymentController) processNextWorkItem() bool {
key, quit := dc.queue.Get()
if quit {
return false
}
defer dc.queue.Done(key)
err := dc.syncHandler(key.(string))
dc.handleErr(err, key)
return true
}
然后线索在这里光荣的断了,看别人的帖子,人家的逻辑都是在都是在syncDeployment,然后仔细一看原来创建的时候有个这,dc.syncHandler = dc.syncDeployment,这个东西在NewDeploymentController里面,眼瞎啊我……按我简单粗暴的理解,就是syncDeployment等价于syncHandler,可以直接去看syncDeployment了
// NewDeploymentController creates a new DeploymentController.
func NewDeploymentController(dInformer appsinformers.DeploymentInformer, rsInformer appsinformers.ReplicaSetInformer, podInformer coreinformers.PodInformer, client clientset.Interface) (*DeploymentController, error) {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartStructuredLogging(0)
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: client.CoreV1().Events("")})
if client != nil && client.CoreV1().RESTClient().GetRateLimiter() != nil {
if err := ratelimiter.RegisterMetricAndTrackRateLimiterUsage("deployment_controller", client.CoreV1().RESTClient().GetRateLimiter()); err != nil {
return nil, err
}
}
dc := &DeploymentController{
client: client,
eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "deployment-controller"}),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "deployment"),
}
dc.rsControl = controller.RealRSControl{
KubeClient: client,
Recorder: dc.eventRecorder,
}
dInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: dc.addDeployment,
UpdateFunc: dc.updateDeployment,
// This will enter the sync loop and no-op, because the deployment has been deleted from the store.
DeleteFunc: dc.deleteDeployment,
})
rsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: dc.addReplicaSet,
UpdateFunc: dc.updateReplicaSet,
DeleteFunc: dc.deleteReplicaSet,
})
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
DeleteFunc: dc.deletePod,
})
//看我
dc.syncHandler = dc.syncDeployment
dc.enqueueDeployment = dc.enqueue
dc.dLister = dInformer.Lister()
dc.rsLister = rsInformer.Lister()
dc.podLister = podInformer.Lister()
dc.dListerSynced = dInformer.Informer().HasSynced
dc.rsListerSynced = rsInformer.Informer().HasSynced
dc.podListerSynced = podInformer.Informer().HasSynced
return dc, nil
}
//
func (dc *DeploymentController) syncDeployment(key string) error {
startTime := time.Now()
klog.V(4).Infof("Started syncing deployment %q (%v)", key, startTime)
defer func() {
klog.V(4).Infof("Finished syncing deployment %q (%v)", key, time.Since(startTime))
}()
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
return err
}
//获取deployment对象
deployment, err := dc.dLister.Deployments(namespace).Get(name)
if errors.IsNotFound(err) {
klog.V(2).Infof("Deployment %v has been deleted", key)
return nil
}
if err != nil {
return err
}
//不深拷贝的话,会改缓存
d := deployment.DeepCopy()
//判断selecor是否为空
everything := metav1.LabelSelector{}
if reflect.DeepEqual(d.Spec.Selector, &everything) {
dc.eventRecorder.Eventf(d, v1.EventTypeWarning, "SelectingAll", "This deployment is selecting all pods. A non-empty selector is required.")
if d.Status.ObservedGeneration < d.Generation {
d.Status.ObservedGeneration = d.Generation
dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(context.TODO(), d, metav1.UpdateOptions{})
}
return nil
}
//列出这个deployment拥有的副本集rs,同时通过孤立的方式来协调ControllerRef。
//(orphaning译过来就很魔性),实际上就是通过 LabelSelector 进行匹配
rsList, err := dc.getReplicaSetsForDeployment(d)
if err != nil {
return err
}
//列这个Deployment拥有的所有Pod,并按他的ReplicaSet分组(就是rs.UID)。podMap的当前用法是:
//*检查Pod是否正确用pod-template-hash标签标记。
//*检查在重新创建部署的中间没有旧的Pod正在运行。
podMap, err := dc.getPodMapForDeployment(d, rsList)
if err != nil {
return err
}
//如果这个deployment处于删除状态,会更新status
if d.DeletionTimestamp != nil {
return dc.syncStatusOnly(d, rsList)
}
//检查是否处于pause状态,确保恢复具有设置的progressDeadlineSeconds的Deployment时不会超时。
if err = dc.checkPausedConditions(d); err != nil {
return err
}
if d.Spec.Paused {
return dc.sync(d, rsList)
}
//检查是否为回滚操作,如果副本集已经更新,就回滚不会重新进入
//所以需要确保后续部署中的部署已清除其回退规范之前,不要继续更新副本集。
if getRollbackTo(d) != nil {
return dc.rollback(d, rsList)
}
//检查deployment是不是scale状态
scalingEvent, err := dc.isScalingEvent(d, rsList)
if err != nil {
return err
}
if scalingEvent {
return dc.sync(d, rsList)
}
//开始更新
switch d.Spec.Strategy.Type {
case apps.RecreateDeploymentStrategyType:
return dc.rolloutRecreate(d, rsList, podMap)
case apps.RollingUpdateDeploymentStrategyType:
return dc.rolloutRolling(d, rsList)
}
return fmt.Errorf("unexpected deployment strategy type: %s", d.Spec.Strategy.Type)
}
syncDeployment函数集中了很多处理,从上往下执行最先检查的就是是不是删除(syncStatusOnly),然后是更新(checkPausedConditions检查Pause),然后是回滚(rollback),然后又是更新(检查是scale)然后是rollout更新(rolloutRecreate或者rolloutRolling)
//按照顺序先看删除,只要DeletionTimestamp不为空,就证明有删除调用
//删除的时候还需要rs,newrs的和oldes都要,都在rsList里面
if d.DeletionTimestamp != nil {
return dc.syncStatusOnly(d, rsList)
}
// rsList在判断DeletionTimestamp是否为空之前就已经由getReplicaSetsForDeployment获取到了
rsList, err := dc.getReplicaSetsForDeployment(d)
后续的删除逻辑由syncStatusOnly函数接手,这个函数在kubernetes/pkg/controller/deployment/sync.go中
//syncStatusOnly仅更新部署状态,并且不执行任何更改操作。
func (dc *DeploymentController) syncStatusOnly(d *apps.Deployment, rsList []*apps.ReplicaSet) error {
newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, false)
if err != nil {
return err
}
allRSs := append(oldRSs, newRS)
//调用syncDeploymentStatus通过allRSs和newRS比对计算出现在应有的状态,更新状态
return dc.syncDeploymentStatus(allRSs, newRS, d)
}
// syncDeploymentStatus检查状态是否为最新,并在必要时进行同步
func (dc *DeploymentController) syncDeploymentStatus(allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, d *apps.Deployment) error {
//调用calculateStatus进行状态比对
newStatus := calculateStatus(allRSs, newRS, d)
if reflect.DeepEqual(d.Status, newStatus) {
return nil
}
newDeployment := d
newDeployment.Status = newStatus
_, err := dc.client.AppsV1().Deployments(newDeployment.Namespace).UpdateStatus(context.TODO(), newDeployment, metav1.UpdateOptions{})
return err
}
// computeStatus通过查看提供的副本集来计算提供的部署的最新状态。
func calculateStatus(allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) apps.DeploymentStatus {
availableReplicas := deploymentutil.GetAvailableReplicaCountForReplicaSets(allRSs)
totalReplicas := deploymentutil.GetReplicaCountForReplicaSets(allRSs)
unavailableReplicas := totalReplicas - availableReplicas
//如果unavailableReplicas为负数,就意味着Deployment正在运行的可用副本超出了预期,
//比如unavailableReplicas缩小缩太多或者误操作,这种时候直接将unavailableReplicas置为零。反正就是不能为负数
if unavailableReplicas < 0 {
unavailableReplicas = 0
}
status := apps.DeploymentStatus{
//确保如果开始重试状态更新,不会获取新的Generation值。
ObservedGeneration: deployment.Generation,
Replicas: deploymentutil.GetActualReplicaCountForReplicaSets(allRSs),
UpdatedReplicas: deploymentutil.GetActualReplicaCountForReplicaSets([]*apps.ReplicaSet{newRS}),
ReadyReplicas: deploymentutil.GetReadyReplicaCountForReplicaSets(allRSs),
AvailableReplicas: availableReplicas,
UnavailableReplicas: unavailableReplicas,
CollisionCount: deployment.Status.CollisionCount,
}
// 逐一复制条件,保证不会改变原始对象。
conditions := deployment.Status.Conditions
for i := range conditions {
status.Conditions = append(status.Conditions, conditions[i])
}
if availableReplicas >= *(deployment.Spec.Replicas)-deploymentutil.MaxUnavailable(*deployment) {
minAvailability := deploymentutil.NewDeploymentCondition(apps.DeploymentAvailable, v1.ConditionTrue, deploymentutil.MinimumReplicasAvailable, "Deployment has minimum availability.")
deploymentutil.SetDeploymentCondition(&status, *minAvailability)
} else {
noMinAvailability := deploymentutil.NewDeploymentCondition(apps.DeploymentAvailable, v1.ConditionFalse, deploymentutil.MinimumReplicasUnavailable, "Deployment does not have minimum availability.")
deploymentutil.SetDeploymentCondition(&status, *noMinAvailability)
}
return status
}
deployment中关于pod的删除到这里就没有了,实际上对于deployment来说,删除pod就是变更了一次状态并同步,具体怎么删除不是deployment要管的事,deployment只需要把状态同步到位,会由kube-controller-manager交出去执行删除,deployment最主要的是关心状态变更及更新,然后看spec.paused
// 检查pause,还有看一下是不是回滚,不是才可以暂停
if d.Spec.Paused {
return dc.sync(d, rsList)
}
if getRollbackTo(d) != nil {
return dc.rollback(d, rsList)
}
// sync负责协调扩展事件或暂停事件时的部署。
func (dc *DeploymentController) sync(d *apps.Deployment, rsList []*apps.ReplicaSet) error {
//获取newRS和oldRSs
newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, false)
if err != nil {
return err
}
//根据newRS和oldRSs判断要不要scale
if err := dc.scale(d, newRS, oldRSs); err != nil {
//如果在尝试scale时遇到错误,将重新安排部署中止重新同步
return err
}
//在暂停且没有回滚的情况下清理部署。
if d.Spec.Paused && getRollbackTo(d) == nil {
if err := dc.cleanupDeployment(oldRSs, d); err != nil {
return err
}
}
allRSs := append(oldRSs, newRS)
//执行syncDeploymentStatus更新状态
return dc.syncDeploymentStatus(allRSs, newRS, d)
}
因为暂停和恢复是配套使用的,所以scale(扩展)也在sync里面,而且scale还在Paused前面,所以scale的优先级高于Paused,也就是说如果如果你先更新再马上进行暂停,那实际上这一次更新也不会立马暂停,而是更新完后才暂停。so 小心一点。还有解释一下rs,deployment自己有一个.spec.revisionHistoryLimit参数会指定保留多少历史版本的revision,这个就是rs,接着看回滚
//从rollback开始调用回滚,实际上回滚也是依靠rs,所以也有rsList参数的传递,如果集群中的rs被删了,就没法回滚了
if getRollbackTo(d) != nil {
return dc.rollback(d, rsList)
}
//rollback函数在kubernetes/pkg/controller/deployment/rollback.go中
//rollback将部署回滚到指定版本
func (dc *DeploymentController) rollback(d *apps.Deployment, rsList []*apps.ReplicaSet) error {
//获取 newRS和oldRSs
newRS, allOldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, true)
if err != nil {
return err
}
allRSs := append(allOldRSs, newRS)
//调用getRollbackTo获取这个rollback的rs
rollbackTo := getRollbackTo(d)
//如果这个rollback的rs为0,就直接回滚到上一个rs
if rollbackTo.Revision == 0 {
if rollbackTo.Revision = deploymentutil.LastRevision(allRSs); rollbackTo.Revision == 0 {
//如果找不到最新的版本,就放弃回滚
dc.emitRollbackWarningEvent(d, deploymentutil.RollbackRevisionNotFound, "Unable to find last revision.")
//放弃回滚
return dc.updateDeploymentAndClearRollbackTo(d)
}
}
for _, rs := range allRSs {
v, err := deploymentutil.Revision(rs)
if err != nil {
klog.V(4).Infof("Unable to extract revision from deployment's replica set %q: %v", rs.Name, err)
continue
}
if v == rollbackTo.Revision {
klog.V(4).Infof("Found replica set %q with desired revision %d", rs.Name, v)
//如果该rs与当前部署的podTemplate.Spec匹配,则通过复制rs中的podTemplate.Spec进行回滚将
//在下一次getAllReplicaSetsAndSyncRevision调用no-op期间递增。
performedRollback, err := dc.rollbackToTemplate(d, rs)
//调用rollbackToTemplate进行回滚操作
if performedRollback && err == nil {
dc.emitRollbackNormalEvent(d, fmt.Sprintf("Rolled back deployment %q to revision %d", d.Name, rollbackTo.Revision))
}
return err
}
}
dc.emitRollbackWarningEvent(d, deploymentutil.RollbackRevisionNotFound, "Unable to find the revision to rollback to.")
return dc.updateDeploymentAndClearRollbackTo(d)
}
//rollbackToTemplate提供比较的的deployment和replica set的模板,并在replica set模板不同的情况下
//使用replica set模板更新部署。并且清除回滚标志,因此deployment后续会重新排队而不是结束。
func (dc *DeploymentController) rollbackToTemplate(d *apps.Deployment, rs *apps.ReplicaSet) (bool, error) {
performedRollback := false
//比较d.Spec.Template和rs.Spec.Template是否相等
if !deploymentutil.EqualIgnoreHash(&d.Spec.Template, &rs.Spec.Template) {
klog.V(4).Infof("Rolling back deployment %q to template spec %+v", d.Name, rs.Spec.Template.Spec)
//替换d.Spec.Template
deploymentutil.SetFromReplicaSetTemplate(d, rs.Spec.Template)
//将rs(将要回滚到的旧rs)注释设置回deployment,否则deployment的当前注释(应与当前新的rs相同)将在回滚后复制到rs。
//比如,deployment中有具有注释{change-cause:create}的旧rs1,而新的rs2 {change-cause:edit}这两个注释都是从Deployment中复制的,并且Deployment也应注释为{change-cause:edit}。
//现在,将Deployment回滚到rs1,所以应该更新Deployment的pod模板,并从rs1复制注释。
//设置 annotation
deploymentutil.SetDeploymentAnnotationsTo(d, rs)
performedRollback = true
} else {
klog.V(4).Infof("Rolling back to a revision that contains the same template as current deployment %q, skipping rollback...", d.Name)
eventMsg := fmt.Sprintf("The rollback revision contains the same template as current deployment %q", d.Name)
dc.emitRollbackWarningEvent(d, deploymentutil.RollbackTemplateUnchanged, eventMsg)
}
//更新deployment并清除回滚标志
return performedRollback, dc.updateDeploymentAndClearRollbackTo(d)
}
通过rollback的rs找到对应的rs,然后使用这个rs的rs.Spec.Template替换deployment.Spec.Template,deployment的rs状态变更完毕后,自会触发replicaSet和pod达到期望状态,这样就完成了回滚操作,所以实际上deployment还是只管状态变更
关于scale
// 使用给定的密钥同步部署
func (dc *DeploymentController) syncDeployment(key string) error {
startTime := time.Now()
klog.V(4).Infof("Started syncing deployment %q (%v)", key, startTime)
defer func() {
klog.V(4).Infof("Finished syncing deployment %q (%v)", key, time.Since(startTime))
}()
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
return err
}
deployment, err := dc.dLister.Deployments(namespace).Get(name)
if errors.IsNotFound(err) {
klog.V(2).Infof("Deployment %v has been deleted", key)
return nil
}
if err != nil {
return err
}
d := deployment.DeepCopy()
everything := metav1.LabelSelector{}
if reflect.DeepEqual(d.Spec.Selector, &everything) {
dc.eventRecorder.Eventf(d, v1.EventTypeWarning, "SelectingAll", "This deployment is selecting all pods. A non-empty selector is required.")
if d.Status.ObservedGeneration < d.Generation {
d.Status.ObservedGeneration = d.Generation
dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(context.TODO(), d, metav1.UpdateOptions{})
}
return nil
}
//列出此部署拥有的副本集,同时通过adoption或者orphaning来协调ControllerRef。
rsList, err := dc.getReplicaSetsForDeployment(d)
if err != nil {
return err
}
//列出此Deployment拥有的所有Pod,按照ReplicaSet进行分组。
// 检查Pod是否正确用pod-template-hash标签标记。
// 检查在重新创建部署的中间是否没有运行旧的Pod。
podMap, err := dc.getPodMapForDeployment(d, rsList)
if err != nil {
return err
}
if d.DeletionTimestamp != nil {
return dc.syncStatusOnly(d, rsList)
}
//暂停/恢复时更新部署条件,确保用户不会超时,使用已设置的progressDeadlineSeconds恢复部署。
if err = dc.checkPausedConditions(d); err != nil {
return err
}
if d.Spec.Paused {
return dc.sync(d, rsList)
}
//如果基础副本集已使用新的更新更新,则不会重新进入回滚,所以必须确保在执行之前,不会继续更新副本集
if getRollbackTo(d) != nil {
return dc.rollback(d, rsList)
}
// 调用isScalingEvent进行scale操作
scalingEvent, err := dc.isScalingEvent(d, rsList)
if err != nil {
return err
}
if scalingEvent {
return dc.sync(d, rsList)
}
switch d.Spec.Strategy.Type {
case apps.RecreateDeploymentStrategyType:
return dc.rolloutRecreate(d, rsList, podMap)
case apps.RollingUpdateDeploymentStrategyType:
return dc.rolloutRolling(d, rsList)
}
return fmt.Errorf("unexpected deployment strategy type: %s", d.Spec.Strategy.Type)
}
isScalingEvent函数,主要负责
// 检查提供的部署是否已通过扩展事件更新(通过查看部署的活动副本集中的需要的副本注释)
func (dc *DeploymentController) isScalingEvent(d *apps.Deployment, rsList []*apps.ReplicaSet) (bool, error) {
//获取所有 rs
newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, false)
if err != nil {
return false, err
}
allRSs := append(oldRSs, newRS)
//过滤出 activeRS 并进行比较
for _, rs := range controller.FilterActiveReplicaSets(allRSs) {
//获取 rs annotation 中 deployment.kubernetes.io/desired-replicas 的值
desired, ok := deploymentutil.GetDesiredReplicasAnnotation(rs)
if !ok {
continue
}
//判断是否需要 scale 操作
if desired != *(d.Spec.Replicas) {
return true, nil
}
}
return false, nil
}
如果isScalingEvent判断确实需要scale操作时会调用sync,在sync方法中调用scale方法最终执行扩容操作
func (dc *DeploymentController) scale(deployment *apps.Deployment, newRS *apps.ReplicaSet, oldRSs []*apps.ReplicaSet) error {
//如果只有一个活动副本集,则scale这个副本集到最大数量,如果没有活动的副本集,那就scale最新的副本集。
if activeOrLatest := deploymentutil.FindActiveOrLatest(newRS, oldRSs); activeOrLatest != nil {
if *(activeOrLatest.Spec.Replicas) == *(deployment.Spec.Replicas) {
return nil
}
//只更新 rs annotation 以及为 deployment 设置 events
_, _, err := dc.scaleReplicaSetAndRecordEvent(activeOrLatest, *(deployment.Spec.Replicas), deployment)
return err
}
//如果新副本集已饱和,那么旧副本集应就完全scaleReplicaSetAndRecordEvent掉。
if deploymentutil.IsSaturated(deployment, newRS) {
for _, old := range controller.FilterActiveReplicaSets(oldRSs) {
if _, _, err := dc.scaleReplicaSetAndRecordEvent(old, 0, deployment); err != nil {
return err
}
}
return nil
}
//有带有pod的旧副本集,然后新副本集未饱和。就按比例缩放所有副本集(新旧副本)
//如果deployment的更新策略为滚动更新,需要按照比例分别对各个活跃的 rs 进行扩容或者缩容
if deploymentutil.IsRollingUpdate(deployment) {
allRSs := controller.FilterActiveReplicaSets(append(oldRSs, newRS))
allRSsReplicas := deploymentutil.GetReplicaCountForReplicaSets(allRSs)
allowedSize := int32(0)
if *(deployment.Spec.Replicas) > 0 {
//计算最大可以创建出的 pod 数
allowedSize = *(deployment.Spec.Replicas) + deploymentutil.MaxSurge(*deployment)
}
//计算需要扩容的 pod 数
deploymentReplicasToAdd := allowedSize - allRSsReplicas
//其他副本应该在活动副本之间按比例分配,按照从大到小的顺序。如果大小相同先扩展较新的副本集,然后缩小时,先缩旧的副本集。
var scalingOperation string
switch {
case deploymentReplicasToAdd > 0:
sort.Sort(controller.ReplicaSetsBySizeNewer(allRSs))
scalingOperation = "up"
case deploymentReplicasToAdd < 0:
sort.Sort(controller.ReplicaSetsBySizeOlder(allRSs))
scalingOperation = "down"
}
//遍历所有活动副本集并估计每个副本集的比例。部署的绝对值绝对不能超过DeploymentReplicasToAdd的绝对值。
deploymentReplicasAdded := int32(0)
nameToSize := make(map[string]int32)
for i := range allRSs {
rs := allRSs[i]
//如果要添加副本,需要重新估计比例,否则就只填充nameToSize以及每个副本集的当前大小。
if deploymentReplicasToAdd != 0 {
proportion := deploymentutil.GetProportion(rs, *deployment, deploymentReplicasToAdd, deploymentReplicasAdded)
nameToSize[rs.Name] = *(rs.Spec.Replicas) + proportion
deploymentReplicasAdded += proportion
} else {
nameToSize[rs.Name] = *(rs.Spec.Replicas)
}
}
// 更新所有副本集
for i := range allRSs {
rs := allRSs[i]
// 将剩余的内容添加/删除到最大副本集。
if i == 0 && deploymentReplicasToAdd != 0 {
leftover := deploymentReplicasToAdd - deploymentReplicasAdded
nameToSize[rs.Name] = nameToSize[rs.Name] + leftover
if nameToSize[rs.Name] < 0 {
nameToSize[rs.Name] = 0
}
}
if _, _, err := dc.scaleReplicaSet(rs, nameToSize[rs.Name], deployment, scalingOperation); err != nil {
// 失败后立即返回,重新安排部署
return err
}
}
}
return nil
}
syncDeployment判断是rolloutRecreate还是rolloutRolling,如果策略是rolloutRolling(滚动更新)
func (dc *DeploymentController) syncDeployment(key string) error {
......
switch d.Spec.Strategy.Type {
case apps.RecreateDeploymentStrategyType:
//调用rolloutRecreate执行更新
return dc.rolloutRecreate(d, rsList, podMap)
case apps.RollingUpdateDeploymentStrategyType:
// 调用rolloutRolling执行更新
return dc.rolloutRolling(d, rsList)
}
......
}
rolloutRolling函数
func (dc *DeploymentController) rolloutRolling(d *apps.Deployment, rsList []*apps.ReplicaSet) error {
//获取所有的 rs,如果没有newRS就创建一个
newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, true)
if err != nil {
return err
}
allRSs := append(oldRSs, newRS)
// 执行scale up操作
scaledUp, err := dc.reconcileNewReplicaSet(allRSs, newRS, d)
if err != nil {
return err
}
if scaledUp {
return dc.syncRolloutStatus(allRSs, newRS, d)
}
// 执行scale down操作
scaledDown, err := dc.reconcileOldReplicaSets(allRSs, controller.FilterActiveReplicaSets(oldRSs), newRS, d)
if err != nil {
return err
}
if scaledDown {
return dc.syncRolloutStatus(allRSs, newRS, d)
}
//清理过期的 rs
if deploymentutil.DeploymentComplete(d, &d.Status) {
if err := dc.cleanupDeployment(oldRSs, d); err != nil {
return err
}
}
//同步deployment status
return dc.syncRolloutStatus(allRSs, newRS, d)
}
其中scale up操作由reconcileNewReplicaSet函数完成
func (dc *DeploymentController) reconcileNewReplicaSet(allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) (bool, error) {
// 判断副本数是否已达到了期望值
if *(newRS.Spec.Replicas) == *(deployment.Spec.Replicas) {
// Scaling not required.
return false, nil
}
//判断是否需要 scale down 操作
if *(newRS.Spec.Replicas) > *(deployment.Spec.Replicas) {
// Scale down.
scaled, _, err := dc.scaleReplicaSetAndRecordEvent(newRS, *(deployment.Spec.Replicas), deployment)
return scaled, err
}
//计算 newRS 所需要的副本数
newReplicasCount, err := deploymentutil.NewRSNewReplicas(deployment, allRSs, newRS)
if err != nil {
return false, err
}
//如果需要 scale ,则更新rs的annotation以及rs.Spec.Replicas
scaled, _, err := dc.scaleReplicaSetAndRecordEvent(newRS, newReplicasCount, deployment)
return scaled, err
}
rolloutRecreate更新策略
// 重建式更新
func (dc *DeploymentController) rolloutRecreate(d *apps.Deployment, rsList []*apps.ReplicaSet, podMap map[types.UID][]*v1.Pod) error {
//获取rs,如果不存在,也不要创建新的rs,免得影响大小,后面再建。
newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, false)
if err != nil {
return err
}
allRSs := append(oldRSs, newRS)
activeOldRSs := controller.FilterActiveReplicaSets(oldRSs)
// 缩容 oldRS
scaledDown, err := dc.scaleDownOldReplicaSetsForRecreate(activeOldRSs, d)
if err != nil {
return err
}
if scaledDown {
// Update DeploymentStatus.
return dc.syncRolloutStatus(allRSs, newRS, d)
}
//确保deployment中已经没有旧的Pod正在运行时了
if oldPodsRunning(newRS, oldRSs, podMap) {
return dc.syncRolloutStatus(allRSs, newRS, d)
}
// 如果newRS为空,在这里就可以创建他了
if newRS == nil {
newRS, oldRSs, err = dc.getAllReplicaSetsAndSyncRevision(d, rsList, true)
if err != nil {
return err
}
allRSs = append(oldRSs, newRS)
}
// 扩容 newRS
if _, err := dc.scaleUpNewReplicaSetForRecreate(newRS, d); err != nil {
return err
}
//清理过期的rs
if util.DeploymentComplete(d, &d.Status) {
if err := dc.cleanupDeployment(oldRSs, d); err != nil {
return err
}
}
//同步deployment状态
return dc.syncRolloutStatus(allRSs, newRS, d)
}
结束
本文详细介绍了Kubernetes Deployment Controller的工作原理,包括如何通过控制ReplicaSet来管理Pod,如何处理创建、更新、回滚、扩缩容等操作。通过源码分析,展示了Deployment如何同步状态,以及在不同场景下的处理流程,如暂停、回滚和滚动更新。

1845

被折叠的 条评论
为什么被折叠?



