go grpc 健康检查

最新推荐文章于 2023-11-30 21:52:43 发布

Smartershining

最新推荐文章于 2023-11-30 21:52:43 发布

阅读量1.6k

点赞数

分类专栏：源码

本文链接：https://blog.csdn.net/Smartershining/article/details/112726823

版权

源码专栏收录该内容

2 篇文章 0 订阅

订阅专栏

背景

官方链接：https://github.com/grpc/grpc/blob/master/doc/health-checking.md。

客户端在超时或者收到unhealthy的回复时可以认为服务端异常。

健康检查的proto文件定义如下：

syntax = "proto3";

package grpc.health.v1;

message HealthCheckRequest {
  string service = 1;
}

message HealthCheckResponse {
  enum ServingStatus {
    UNKNOWN = 0;
    SERVING = 1;
    NOT_SERVING = 2;
    SERVICE_UNKNOWN = 3;  // Used only by the Watch method.
  }
  ServingStatus status = 1;
}

service Health {
  rpc Check(HealthCheckRequest) returns (HealthCheckResponse);

  rpc Watch(HealthCheckRequest) returns (stream HealthCheckResponse);
}

客户端可以通过调用check方法（需要添加deadline）检查服务端的健康状况，建议servicename为package_names.ServiceName, 例如 grpc.health.v1.Health。watch方法用于流式健康检查，server会立即响应当前的服务状态，当server服务状态发生改变的时候也会发送消息过来。

使用

服务端注册一个叫xxxx.Health的服务，用于grpc的健康检查。

hsrv := health.NewServer() 
hsrv.SetServingStatus("xxxx.Health",healthpb.HealthCheckResponse_SERVING)
healthpb.RegisterHealthServer(s, hsrv)

客户端

    conn, err := grpc.Dial(address,grpc.WithInsecure(),
    		grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"LoadBalancingPolicy": "%s", "HealthCheckConfig": {"ServiceName": "xxxx.Health"}}`, roundrobin.Name)))

在grpc.WithDefaultServiceConfig 中配置 HealthCheckConfig同时还需要导入_ "google.golang.org/grpc/health"

如果需要自定义健康检查，只需要按照proto文件实现Check和Watch方法即可。

源码分析

grpc.Dial --> grpc.DialContext ,ClientConn里面默认dialOptions里面healthCheckFunc是grpc health/client.go里面的clientHealthCheck方法：

func clientHealthCheck(ctx context.Context, newStream func(string) (interface{}, error), setConnectivityState func(connectivity.State, error), service string) error {
	tryCnt := 0

retryConnection:
	for {
		// Backs off if the connection has failed in some way without receiving a message in the previous retry.
		if tryCnt > 0 && !backoffFunc(ctx, tryCnt-1) {
			return nil
		}
		tryCnt++

		if ctx.Err() != nil {
			return nil
		}
		setConnectivityState(connectivity.Connecting, nil)
		rawS, err := newStream(healthCheckMethod)
		if err != nil {
			continue retryConnection
		}

		s, ok := rawS.(grpc.ClientStream)
		// Ideally, this should never happen. But if it happens, the server is marked as healthy for LBing purposes.
		if !ok {
			setConnectivityState(connectivity.Ready, nil)
			return fmt.Errorf("newStream returned %v (type %T); want grpc.ClientStream", rawS, rawS)
		}

		if err = s.SendMsg(&healthpb.HealthCheckRequest{Service: service}); err != nil && err != io.EOF {
			// Stream should have been closed, so we can safely continue to create a new stream.
			continue retryConnection
		}
		s.CloseSend()

		resp := new(healthpb.HealthCheckResponse)
		for {
			err = s.RecvMsg(resp)

			// Reports healthy for the LBing purposes if health check is not implemented in the server.
			if status.Code(err) == codes.Unimplemented {
				setConnectivityState(connectivity.Ready, nil)
				return err
			}

			// Reports unhealthy if server's Watch method gives an error other than UNIMPLEMENTED.
			if err != nil {
				setConnectivityState(connectivity.TransientFailure, fmt.Errorf("connection active but received health check RPC error: %v", err))
				continue retryConnection
			}

			// As a message has been received, removes the need for backoff for the next retry by resetting the try count.
			tryCnt = 0
			if resp.Status == healthpb.HealthCheckResponse_SERVING {
				setConnectivityState(connectivity.Ready, nil)
			} else {
				setConnectivityState(connectivity.TransientFailure, fmt.Errorf("connection active but health check failed. status=%s", resp.Status))
			}
		}
	}
}

health check调用方法是在addrConn 的connect方法里面

addrConn.connect() --> 起了一个goroutine 调用addrConn的resetTransport方法，resetTransport里面会遍历addrConn里面所有的地址，知道连接上第一个可用的地址为止，连接成功后会起一个协程调用addrConn里面startHealthCheck方法。

// Start the health checking stream.
	go func() {
		err := ac.cc.dopts.healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName)
		if err != nil {
			if status.Code(err) == codes.Unimplemented {
				channelz.Error(logger, ac.channelzID, "Subchannel health check is unimplemented at server side, thus health check is disabled")
			} else {
				channelz.Errorf(logger, ac.channelzID, "HealthCheckFunc exits with unexpected error %v", err)
			}
		}
	}()

其中setConnectivityState的实现是调用addrConn的updateConnectivityState