初始化
文件位置:cmd/kube-controller-manager/app/controllermanager.go
func NewControllerInitializers(loopMode ControllerLoopMode) map[string]InitFunc {
...
controllers["horizontalpodautoscaling"] = startHPAController
...
}
HPA Controller和其他的Controller一样,都在NewControllerInitializers方法中进行注册,然后通过startHPAController来启动。
startHPAController
|-> startHPAControllerWithRESTClient
|-> startHPAControllerWithMetricsClient
|-> NewHorizontalController
文件位置:/pkg/controller/podautoscaler/horizontal.go
// NewHorizontalController creates a new HorizontalController.
func NewHorizontalController(
evtNamespacer v1core.EventsGetter,
scaleNamespacer scaleclient.ScalesGetter,
hpaNamespacer autoscalingclient.HorizontalPodAutoscalersGetter,
mapper apimeta.RESTMapper,
metricsClient metricsclient.MetricsClient,
hpaInformer autoscalinginformers.HorizontalPodAutoscalerInformer,
podInformer coreinformers.PodInformer,
resyncPeriod time.Duration,
downscaleStabilisationWindow time.Duration,
tolerance float64,
cpuInitializationPeriod,
delayOfInitialReadinessStatus time.Duration,
) *HorizontalController {
broadcaster := record.NewBroadcaster()
broadcaster.StartStructuredLogging(0)
broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: evtNamespacer.Events("")})
recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "horizontal-pod-autoscaler"})
...
hpaInformer.Informer().AddEventHandlerWithResyncPeriod(
cache.ResourceEventHandlerFuncs{
AddFunc: hpaController.enqueueHPA,
UpdateFunc: hpaController.updateHPA,
DeleteFunc: hpaController.deleteHPA,
},
resyncPeriod,
)
...
return hpaController
}
核心逻辑是监听hpa对象的事件,分别对应hpaController.unqueueHPA,hpaController.updateHPA和hpaController.deleteHPA。enqueueHPA本质上就是把hpa对象注册到HorizontalController的队列里,updateHPA是更新hpa对象,deleteHPA是删除对象。hpa对象存在hpaController的workqueue中。
代码见下文
文件位置:/pkg/controller/podautoscaler/horizontal.go
// obj could be an *v1.HorizontalPodAutoscaler, or a DeletionFinalStateUnknown marker item.
func (a *HorizontalController) updateHPA(old, cur interface{}) {
a.enqueueHPA(cur)
}
// obj could be an *v1.HorizontalPodAutoscaler, or a DeletionFinalStateUnknown marker item.
func (a *HorizontalController) enqueueHPA(obj interface{}) {
key, err := controller.KeyFunc(obj)
if err != nil {
utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err))
return
}
// Requests are always added to queue with resyncPeriod delay. If there's already
// request for the HPA in the queue then a new request is always dropped. Requests spend resync
// interval in queue so HPAs are processed every resync interval.
a.queue.AddRateLimited(key)
}
func (a *HorizontalController) deleteHPA(obj interface{}) {
key, err := controller.KeyFunc(obj)
if err != nil {
utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err))
return
}
// TODO: could we leak if we fail to get the key?
a.queue.Forget(key)
}
startHPAController
文件位置:cmd/kube-controller-manager/app/autoscaling.go
最后会调用到startHPAControllerWithMetricsClient方法,启动一个线程来调用NewHorizontalController方法初始化一个HPA Controller,然后执行Run方法。
func startHPAController(ctx ControllerContext) (http.Handler, bool, error) {
...
return startHPAControllerWithLegacyClient(ctx)
}
func startHPAControllerWithLegacyClient(ctx ControllerContext) (http.Handler, bool, error) {
hpaClient := ctx.ClientBuilder.ClientOrDie("horizontal-pod-autoscaler")
metricsClient := metrics.NewHeapsterMetricsClient(
hpaClient,
metrics.DefaultHeapsterNamespace,
metrics.DefaultHeapsterScheme,
metrics.DefaultHeapsterService,
metrics.DefaultHeapsterPort,
)
return startHPAControllerWithMetricsClient(ctx, metricsClient)
}
func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient metrics.MetricsClient) (http.Handler, bool, error) {
hpaClient := ctx.ClientBuilder.ClientOrDie("horizontal-pod-autoscaler")
hpaClientConfig := ctx.ClientBuilder.ConfigOrDie("horizontal-pod-autoscaler")
scaleKindResolver := scale.NewDiscoveryScaleKindResolver(hpaClient.Discovery())
scaleClient, err := scale.NewForConfig(hpaClientConfig, ctx.RESTMapper, dynamic.LegacyAPIPathResolverFunc, scaleKindResolver)
if err != nil {
return nil, false, err
}
// 初始化
go podautoscaler.NewHorizontalController(
hpaClient.CoreV1(),
scaleClient,
hpaClient.AutoscalingV1(),
ctx.RESTMapper,
metricsClient,
ctx.InformerFactory.Autoscaling().V1().HorizontalPodAutoscalers(),
ctx.InformerFactory.Core().V1().Pods(),
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerDownscaleStabilizationWindow.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerTolerance,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerCPUInitializationPeriod.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay.Duration,
).Run(ctx.Stop)
return nil, true, nil
}
Run
文件位置:pkg/controller/podautoscaler/horizontal.go
func (a *HorizontalController) Run(stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer a.queue.ShutDown()
klog.Infof("Starting HPA controller")
defer klog.Infof("Shutting down HPA controller")
if !cache.WaitForNamedCacheSync("HPA", stopCh, a.hpaListerSynced, a.podListerSynced) {
return
}
// 启动异步线程,每秒执行一次
go wait.Until(a.worker, time.Second, stopCh)
<-stopCh
}
这里会调用worker执行具体的扩缩容的逻辑。
hpa逻辑路口:定时执行worker
go wait.Until(a.worker, time.Second, stopCh)
核心代码分析
processNextWorkItem: 遍历所有hpa对象
func (a *HorizontalController) worker() {
for a.processNextWorkItem() {
}
klog.Infof("horizontal pod autoscaler controller worker shutting down")
}
func (a *HorizontalController) processNextWorkItem() bool {
key, quit := a.queue.Get()
if quit {
return false
}
defer a.queue.Done(key)
deleted, err := a.reconcileKey(key.(string))
if err != nil {
utilruntime.HandleError(err)
}
if !deleted {
a.queue.AddRateLimited(key)
}
return true
}
hpa对象存储在HorizontalController的队列中,遍历每个hpa对象进行处理。
processNextWorkItem->
|-> reconcileKey
|-> reconcileAutoscaler
worker里面一路执行下来会走到reconcileAutoscaler方法里面,这里是HPA的核心。下面我们专注看看这部分。
reconcileAutoscaler:计算副本数
func (a *HorizontalController) reconcileAutoscaler(hpav1Shared *autoscalingv1.HorizontalPodAutoscaler, key string) error {
...
//副本数为0,不启动自动扩缩容
if scale.Spec.Replicas == 0 && minReplicas != 0 {
// Autoscaling is disabled for this resource
desiredReplicas = 0
rescale = false
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "ScalingDisabled", "scaling is disabled since the replica count of the target is zero")
// 如果当前副本数大于最大期望副本数,那么设置期望副本数为最大副本数
} else if currentReplicas > hpa.Spec.MaxReplicas {
rescaleReason = "Current number of replicas above Spec.MaxReplicas"
desiredReplicas = hpa.Spec.MaxReplicas
// 同上
} else if currentReplicas < minReplicas {
rescaleReason = "Current number of replicas below Spec.MinReplicas"
desiredReplicas = minReplicas
} else {
var metricTimestamp time.Time
//计算需要扩缩容的数量
metricDesiredReplicas, metricName, metricStatuses, metricTimestamp, err = a.computeReplicasForMetrics(hpa, scale, hpa.Spec.Metrics)
if err != nil {
...
}
klog.V(4).Infof("proposing %v desired replicas (based on %s from %s) for %s", metricDesiredReplicas, metricName, metricTimestamp, reference)
rescaleMetric := ""
if metricDesiredReplicas > desiredReplicas {
desiredReplicas = metricDesiredReplicas
rescaleMetric = metricName
}
if desiredReplicas > currentReplicas {
rescaleReason = fmt.Sprintf("%s above target", rescaleMetric)
}
if desiredReplicas < currentReplicas {
rescaleReason = "All metrics below target"
}
//可以在扩缩容的时候指定一个稳定窗口,以防止缩放目标中的副本数量出现波动
//doc:https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#support-for-configurable-scaling-behavior
if hpa.Spec.Behavior == nil {
desiredReplicas = a.normalizeDesiredReplicas(hpa, key, currentReplicas, desiredReplicas, minReplicas)
} else {
desiredReplicas = a.normalizeDesiredReplicasWithBehaviors(hpa, key, currentReplicas, desiredReplicas, minReplicas)
}
rescale = desiredReplicas != currentReplicas
}
...
}
这一段代码是reconcileAutoscaler里面的核心代码,在这里会确定一个区间,首先根据当前的scale对象和当前hpa里面配置的对应的参数的值,决策当前的副本数量,其中针对于超过设定的maxReplicas和小于minReplicas两种情况,只需要简单的修正为对应的值,直接更新对应的scale对象即可,而scale副本为0的对象,则hpa不会在进行任何操作。
对于当前副本数在maxReplicas和minReplicas之间的时候,则需要计算是否需要扩缩容,计算则是调用computeReplicasForMetrics方法来实现。
computeReplicasForMetrics 遍历度量目标
func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.HorizontalPodAutoscaler, scale *autoscalingv1.Scale, metricSpecs []autoscalingv2.MetricSpec) (replicas int32, metric string, statuses []autoscalingv2.MetricStatus, timestamp time.Time, err error) { ... //这里的度量目标可以是一个列表,所以遍历之后取最大的需要扩缩容的数量 for i, metricSpec := range metricSpecs { //根据type类型计算需要扩缩容的数量 replicaCountProposal, metricNameProposal, timestampProposal, condition, err := a.computeReplicasForMetric(hpa, metricSpec, specReplicas, statusReplicas, selector, &statuses[i]) if err != nil { if invalidMetricsCount <= 0 { invalidMetricCondition = condition invalidMetricError = err } invalidMetricsCount++ } //记录最大的需要扩缩容的数量 if err == nil && (replicas == 0 || replicaCountProposal > replicas) { timestamp = timestampProposal replicas = replicaCountProposal metric = metricNameProposal } } ... return replicas, metric, statuses, timestamp, nil }
在上面的代码中遍历所有的metrics,然后选取返回副本数最大的那个。主要计算逻辑都在computeReplicasForMetric中,下面我们看看这个方法。
computeReplicasForMetric:根据type计算副本数
func (a *HorizontalController) computeReplicasForMetric(hpa *autoscalingv2.HorizontalPodAutoscaler, spec autoscalingv2.MetricSpec, specReplicas, statusReplicas int32, selector labels.Selector, status *autoscalingv2.MetricStatus) (replicaCountProposal int32, metricNameProposal string, timestampProposal time.Time, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) { //根据不同的类型来进行计量 switch spec.Type { //表示如果是一个k8s对象,如Ingress对象 case autoscalingv2.ObjectMetricSourceType: ... // 表示pod度量类型 case autoscalingv2.PodsMetricSourceType: metricSelector, err := metav1.LabelSelectorAsSelector(spec.Pods.Metric.Selector) if err != nil { condition := a.getUnableComputeReplicaCountCondition(hpa, "FailedGetPodsMetric", err) return 0, "", time.Time{}, condition, fmt.Errorf("failed to get pods metric value: %v", err) } //仅支持AverageValue度量目标,计算需要扩缩容的数量 replicaCountProposal, timestampProposal, metricNameProposal, condition, err = a.computeStatusForPodsMetric(specReplicas, spec, hpa, selector, status, metricSelector) if err != nil { return 0, "", time.Time{}, condition, fmt.Errorf("failed to get pods metric value: %v", err) } // 表示Resource度量类型 case autoscalingv2.ResourceMetricSourceType: ... case autoscalingv2.ExternalMetricSourceType: ... default: errMsg := fmt.Sprintf("unknown metric source type %q", string(spec.Type)) err = fmt.Errorf(errMsg) condition := a.getUnableComputeReplicaCountCondition(hpa, "InvalidMetricSourceType", err) return 0, "", time.Time{}, condition, err } return replicaCountProposal, metricNameProposal, timestampProposal, autoscalingv2.HorizontalPodAutoscalerCondition{}, nil }
这里会根据不同的度量类型来进行统计,目前度量类型有四种,分别是Pods、Object、Resource、External。
computeStatusForPodsMetric&GetMetricReplicas:计算需要扩缩容的数量
文件位置:pkg/controller/podautoscaler/replica_calculator.go
func (a *HorizontalController) computeStatusForPodsMetric(currentReplicas int32, metricSpec autoscalingv2.MetricSpec, hpa *autoscalingv2.HorizontalPodAutoscaler, selector labels.Selector, status *autoscalingv2.MetricStatus, metricSelector labels.Selector) (replicaCountProposal int32, timestampProposal time.Time, metricNameProposal string, condition autoscalingv2.HorizontalPodAutoscalerCondition, err error) { //计算需要扩缩容的数量 replicaCountProposal, utilizationProposal, timestampProposal, err := a.replicaCalc.GetMetricReplicas(currentReplicas, metricSpec.Pods.Target.AverageValue.MilliValue(), metricSpec.Pods.Metric.Name, hpa.Namespace, selector, metricSelector) if err != nil { condition = a.getUnableComputeReplicaCountCondition(hpa, "FailedGetPodsMetric", err) return 0, timestampProposal, "", condition, err } ... return replicaCountProposal, timestampProposal, fmt.Sprintf("pods metric %s", metricSpec.Pods.Metric.Name), autoscalingv2.HorizontalPodAutoscalerCondition{}, nil } func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUtilization int64, metricName string, namespace string, selector labels.Selector, metricSelector labels.Selector) (replicaCount int32, utilization int64, timestamp time.Time, err error) { //获取pod中度量数据 metrics, timestamp, err := c.metricsClient.GetRawMetric(metricName, namespace, selector, metricSelector) if err != nil { return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v", metricName, err) } //通过结合度量数据来计算希望扩缩容的数量是多少 replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector, v1.ResourceName("")) return replicaCount, utilization, timestamp, err }
这里会调用GetRawMetric方法来获取pod对应的度量数据,然后再调用calcPlainMetricReplicas方法结合度量数据与目标期望来计算希望扩缩容的数量是多少。
calcPlainMetricReplicas:计算副本数具体实现
func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUtilization int64, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, utilization int64, err error) { podList, err := c.podLister.Pods(namespace).List(selector) ... //将pod分成三类进行统计,得到ready的pod数量、ignored Pod集合、missing Pod集合 readyPodCount, ignoredPods, missingPods := groupPods(podList, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus) //在度量的数据里移除ignored Pods集合的数据 removeMetricsForPods(metrics, ignoredPods) //计算pod中container request 设置的资源之和 requests, err := calculatePodRequests(podList, resource) ... //获取资源使用率 usageRatio, utilization := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization) ... }
这里会调用groupPods将pod列表的进行一个分类统计。ignoredPods集合里面包含了pod状态为PodPending的数据;missingPods列表里面包含了在度量数据里面根据pod名找不到的数据。
因为missingPods的度量数据已经在metrics里是找不到的,然后只需要剔除掉ignored Pods集合中度量的资源就好了。接下来调用calculatePodRequests方法统计pod中container request 设置的资源之和。
总结
hpa整个逻辑流程图: