[sofamosn源码]健康检查源码

1.配置

"cluster_manager": {
  "clusters": [
    {
      "name": "serverCluster",
      "type": "SIMPLE",
      "lb_type": "LB_RANDOM",
      "max_request_per_conn": 1024,
      "conn_buffer_limit_bytes": 32768,
      "hosts": [
         {"address":"127.0.0.1:8080"}
      ]
    },
    {
      "name": "clientCluster",
      "health_check": {
        "protocol":"Http1",
        "timeout":"10s",
        "interval":"1m",
        "interval_jitter":"1m",
        "healthy_threshold":1,
        "service_name":"test"
      },
      "type": "SIMPLE",
      "lb_type": "LB_RANDOM",
      "max_request_per_conn": 1024,
      "conn_buffer_limit_bytes": 32768,
      "hosts": [
         {"address":"127.0.0.1:8080"}
      ]
    }
  ]
}

 

2.源码

1.启动后,会去初始化cluster

2.初始化时,根据配置的heath_check字段,给cluster设置healthChecker.

3.初始化后的一个步骤,就是updateHost.这个里面就去执行healthChecker.

  • 3.1主要就是对每个host,启动一个协程去检测端口是否通.
  • 3.2如果不通,设置host为不健康,接着继续健康检查;通的话,设置为health
// simpleCluster is an implementation of types.Cluster
type simpleCluster struct {
   info          *clusterInfo
   mutex         sync.Mutex
   healthChecker types.HealthChecker
   lbInstance    types.LoadBalancer // load balancer used for this cluster
   hostSet       *hostSet
   snapshot      atomic.Value
}
func newSimpleCluster(clusterConfig v2.Cluster) types.Cluster {
   // TODO support original dst cluster
   if clusterConfig.ClusterType == v2.ORIGINALDST_CLUSTER {
      clusterConfig.LbType = v2.LB_ORIGINAL_DST
   }
   info := &clusterInfo{
      name:                 clusterConfig.Name,
      clusterType:          clusterConfig.ClusterType,
      maxRequestsPerConn:   clusterConfig.MaxRequestPerConn,
      connBufferLimitBytes: clusterConfig.ConnBufferLimitBytes,
      stats:                newClusterStats(clusterConfig.Name),
      lbSubsetInfo:         NewLBSubsetInfo(&clusterConfig.LBSubSetConfig), // new subset load balancer info
      lbOriDstInfo:         NewLBOriDstInfo(&clusterConfig.LBOriDstConfig), // new oridst load balancer info
      lbType:               types.LoadBalancerType(clusterConfig.LbType),
      resourceManager:      NewResourceManager(clusterConfig.CirBreThresholds),
   }

   // set ConnectTimeout
   if clusterConfig.ConnectTimeout != nil {
      info.connectTimeout = clusterConfig.ConnectTimeout.Duration
   } else {
      info.connectTimeout = network.DefaultConnectTimeout
   }

   // tls mng
   mgr, err := mtls.NewTLSClientContextManager(&clusterConfig.TLS)
   if err != nil {
      log.DefaultLogger.Alertf("cluster.config", "[upstream] [cluster] [new cluster] create tls context manager failed, %v", err)
   }
   info.tlsMng = mgr
   cluster := &simpleCluster{
      info: info,
   }
   // init a empty
   hostSet := &hostSet{}
   cluster.snapshot.Store(&clusterSnapshot{
      info:    info,
      hostSet: hostSet,
      lb:      NewLoadBalancer(info, hostSet),
   })
   if clusterConfig.HealthCheck.ServiceName != "" {
      log.DefaultLogger.Infof("[upstream] [cluster] [new cluster] cluster %s have health check", clusterConfig.Name)
      cluster.healthChecker = healthcheck.CreateHealthCheck(clusterConfig.HealthCheck)
   }
   return cluster
}
func (sc *simpleCluster) UpdateHosts(newHosts []types.Host) {
    //略略略
   if sc.healthChecker != nil {
      sc.healthChecker.SetHealthCheckerHostSet(hostSet)
   }
}
// only called in cluster, lock in cluster
// SetHealthCheckerHostSet reset the healthchecker's hosts
func (hc *healthChecker) SetHealthCheckerHostSet(hostSet types.HostSet) {
   hc.stop()
   hc.hosts = hostSet.Hosts()
   hc.start()
}
//对每一个host进行健康检查
func (hc *healthChecker) startCheck(host types.Host) {
   addr := host.AddressString()
   if _, ok := hc.checkers[addr]; !ok {
      //创建一个session对象,代表一次健康检查的会话.
      s := hc.sessionFactory.NewSession(hc.sessionConfig, host)
      if s == nil {
         log.DefaultLogger.Alertf("healthcheck.session", "[upstream] [health check] Create Health Check Session Error, Remote Address = %s", addr)
         return
      }
      c := newChecker(s, host, hc)
      hc.checkers[addr] = c
      utils.GoWithRecover(func() {
         //=开启一个协程,去做健康检查=
         c.Start()
      }, nil)
      atomic.AddInt64(&hc.localProcessHealthy, 1) // default host is healthy
   }
}

 

下面是关键逻辑了

 

/**
健康检查.每一个cluster会对应一个sessionChecker.执行OnCheck方法
 */
func (c *sessionChecker) Start() {
   //1.创建一个定时器.定时调用check函数
   c.checkTimer = utils.NewTimer(firstInterval, c.OnCheck)
   for {
      select {
      case <-c.stop:
         return
      default:
         // prepare a check
         currentID := atomic.AddUint64(&c.checkID, 1)
         select {
         case <-c.stop:
            return
            //====健康检查结果====
         case resp := <-c.resp:
            // if the ID is not equal, means we receive a timeout for this ID, ignore the response
            if resp.ID == currentID {
               c.checkTimeout.Stop()
               //存活的话,就把节点设置为healthy
               if resp.Healthy {
                  c.HandleSuccess()
               } else {
                 //否则就shutdown
                  c.HandleFailure(types.FailureActive)
               }
               // next health checker
               c.checkTimer = utils.NewTimer(c.HealthChecker.getCheckInterval(), c.OnCheck)
               if log.DefaultLogger.GetLogLevel() >= log.DEBUG {
                  log.DefaultLogger.Debugf("[upstream] [health check] [session checker] receive a response id: %d", resp.ID)
               }
            } else {
               if log.DefaultLogger.GetLogLevel() >= log.DEBUG {
                  log.DefaultLogger.Debugf("[upstream] [health check] [session checker] receive a expired id response, response id: %d, currentID: %d", resp.ID, currentID)
               }
            }
         case <-c.timeout:
            //超时后,进行失败处理
            c.checkTimer.Stop()
            c.Session.OnTimeout() // session timeout callbacks
            c.HandleFailure(types.FailureNetwork)
            // next health checker
            //重新开始健康检查
            c.checkTimer = utils.NewTimer(c.HealthChecker.getCheckInterval(), c.OnCheck)
            if log.DefaultLogger.GetLogLevel() >= log.DEBUG {
               log.DefaultLogger.Debugf("[upstream] [health check] [session checker] receive a timeout response at id: %d", currentID)
            }
         }
      }
   }
}
/**
健康检查
 */
func (c *sessionChecker) OnCheck() {
   // record current id
   id := atomic.LoadUint64(&c.checkID)
   c.HealthChecker.stats.attempt.Inc(1)
   // start a timeout before check health
   c.checkTimeout.Stop()
   //设置超时定时器.超时后,就会往timeout这个channel放个true
   c.checkTimeout = utils.NewTimer(c.HealthChecker.timeout, c.OnTimeout)
   c.resp <- checkResponse{
      ID:      id,
      Healthy: c.Session.CheckHealth(),
   }
}
//实际监看检查的逻辑
func (s *TCPDialSession) CheckHealth() bool {
   // default dial timeout, maybe already timeout by checker
   conn, err := net.DialTimeout("tcp", s.addr, 30*time.Second)
   if err != nil {
      log.DefaultLogger.Infof("[upstream] [health check] [tcpdial session] dial tcp for host %s error: %v", s.addr, err)
      return false
   }
   conn.Close()
   return true
}

注意,支持的健康检查类型

var ProtocolsSupported = map[string]bool{
   string(protocol.Auto):      true,
   string(protocol.HTTP1):     true,
   string(protocol.HTTP2):     true,
   string(protocol.Xprotocol): true,
}

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值