上一篇https://blog.csdn.net/a1023934860/article/details/125855242?spm=1001.2014.3001.5502我们讲解了pilot-discovery是如何进行创建的,这一节让我们进入启动去看看它是如何对服务进行注册的以及如何对上游(envoy)进行配置推送的.
前序
在开始之前,我们先将上一篇遗留的几个问题进行解释.
- s.initDiscoveryService(args) 该方法有什么用
- serviceEntryHandler 外部服务注册后触发的事件
- workloadEntryHandler内部服务注册后触发的事件
- initMulticluster 对集群原生资源的管理
接下来让我们一一解答
s.initDiscoveryService(args)
它的功能是提供了envoy与istiod的通讯,具体流程是
- 创建一个grpc服务器.
- 为其添加一个handler,istio-proxy调用该handler进行服务注册功能.
- 将上游建立的连接存储到XDS中.
了解了功能后就让我们开始进入源码吧
func (s *Server) initDiscoveryService(args *PilotArgs) {
// 这里会在启动的时候运行,下文会重点讲解xdsserver的start
s.addStartFunc(func(stop <-chan struct{}) error {
log.Infof("Starting ADS server")
s.XDSServer.Start(stop)
return nil
})
// 创建grpc服务并注册handler
s.initGrpcServer(args.KeepaliveOptions)
if args.ServerOptions.GRPCAddr != "" {
s.grpcAddress = args.ServerOptions.GRPCAddr
} else {
// This happens only if the GRPC port (15010) is disabled. We will multiplex
// it on the HTTP port. Does not impact the HTTPS gRPC or HTTPS.
log.Info("multiplexing gRPC on http addr ", args.ServerOptions.HTTPAddr)
s.MultiplexGRPC = true
}
}
func (s *Server) initGrpcServer(options *istiokeepalive.Options) {
interceptors := []grpc.UnaryServerInterceptor{
// setup server prometheus monitoring (as final interceptor in chain)
prometheus.UnaryServerInterceptor,
}
grpcOptions := istiogrpc.ServerOptions(options, interceptors...)
// 创建一个grpc服务
s.grpcServer = grpc.NewServer(grpcOptions...)
// 注册handler
s.XDSServer.Register(s.grpcServer)
// 创建grpc反射服务,在 Server 端添加后可以通过该服务获取所有服务的信息,包括服务定义,方法,属性等;
// 主要为了审计工作
reflection.Register(s.grpcServer)
}
func (s *DiscoveryServer) Register(rpcs *grpc.Server) {
// Register v3 server
discovery.RegisterAggregatedDiscoveryServiceServer(rpcs, s)
}
func RegisterAggregatedDiscoveryServiceServer(s *grpc.Server, srv AggregatedDiscoveryServiceServer) {
s.RegisterService(&_AggregatedDiscoveryService_serviceDesc, srv)
}
// handler具体信息
var _AggregatedDiscoveryService_serviceDesc = grpc.ServiceDesc{
ServiceName: "envoy.service.discovery.v3.AggregatedDiscoveryService",
HandlerType: (*AggregatedDiscoveryServiceServer)(nil),
Methods: []grpc.MethodDesc{},
Streams: []grpc.StreamDesc{
// 全量连接
{
StreamName: "StreamAggregatedResources",
Handler: _AggregatedDiscoveryService_StreamAggregatedResources_Handler,
ServerStreams: true,
ClientStreams: true,
},
// 增量连接
{
StreamName: "DeltaAggregatedResources",
Handler: _AggregatedDiscoveryService_DeltaAggregatedResources_Handler,
ServerStreams: true,
ClientStreams: true,
},
},
Metadata: "envoy/service/discovery/v3/ads.proto",
}
从上面的代码我们可以看到,grpc最终会注册两个方法,一个是全量注册,一个是增量注册.本文不讲解增量注册(它复杂)这里只讲解全量注册.
上游服务在注册时会向grpc服务器发送注册请求,调用上面的handler方法,具体流程为
- 上游请求注册grpc触发handler方法
- grpc服务器首先对其身份认证
- 获取最新的PushContext(上一篇有介绍其作用)
- 将con添加到XDS中
- 如果有WorkloadEntryGroup则调用WorkloadEntryController根据当前连接信息,创建WorkloadEntry资源
让我们先看一下Connection结构体里有什么,看看它有什么魅力能让xds进行存储
type Connection struct {
// 源地址,服务地址
peerAddr string
// 连接时间用于调试使用
connectedAt time.Time
// 使用 pod.namespaces+int(64)随机的,用于存储到xds时的key值
conID string
// 建立该连接的客户端,istio-proxy
proxy *model.Proxy
// istiod向该服务推送消息时所用到的管道
pushChannel chan *Event
// 因为是流连接,这里就是流数据
stream DiscoveryStream
// 这里是增量流数据
deltaStream DeltaDiscoveryStream
// Original node metadata, to avoid unmarshal/marshal.
// This is included in internal events.
node *core.Node
// initialized channel will be closed when proxy is initialized. Pushes, or anything accessing
// the proxy, should not be started until this channel is closed.
initialized chan struct{}
stop chan struct{}
// envoy向istiod发送消息所用到的管道
reqChan chan *discovery.DiscoveryRequest
deltaReqChan chan *discovery.DeltaDiscoveryRequest
errorChan chan error
}
那么接下来让我们走进源码看一看它究竟是如何工作的.
func (s *DiscoveryServer) Stream(stream DiscoveryStream) error {
//这里是对knative第一次创建连接时的优化
//原因是knative的第一个请求时加载请求,完成请求后会开启并发性,但是xds使用的长连接的性质
// 这时候的第一个请求还一致存在导致其他请求被阻塞
// 所以需要第一个请求加载完成后立即退出,让客户端进行重试
if knativeEnv != "" && firstRequest.Load() {
firstRequest.Store(false)
return status.Error(codes.Unavailable, "server warmup not complete; try again")
}
// istiod是否已经准备就绪,比如informer创建的缓存是否已经同步完成等操作
if !s.IsServerReady() {
return status.Error(codes.Unavailable, "server is not ready to serve discovery information")
}
// 获取当前流的上下文
ctx := stream.Context()
peerAddr := "0.0.0.0"
// 获取服务IP地址
if peerInfo, ok := peer.FromContext(ctx); ok {
peerAddr = peerInfo.Addr.String()
}
if err := s.WaitForRequestLimit(stream.Context()); err != nil {
log.Warnf("ADS: %q exceeded rate limit: %v", peerAddr, err)
return status.Errorf(codes.ResourceExhausted, "request rate limit exceeded: %v", err)
}
// 这里会调用身份认证插件进行认证,默认使用jwt进行认证,有一个认证成功即可
ids, err := s.authenticate(ctx)
if err != nil {
return status.Error(codes.Unauthenticated, err.Error())
}
if ids != nil {
log.Debugf("Authenticated XDS: %v with identity %v", peerAddr, ids)
} else {
log.Debugf("Unauthenticated XDS: %s", peerAddr)
}
// 初始化上下文信息,根据注册中心获取所有的需要填充的资源然后填充到属性中
// 比如使用了k8s作为注册中心,要填充该上下文中的virtualService,那么就会调用client-go的list获取当前集群中的所有virtualService信息填充到上下文中.
if err = s.globalPushContext().InitContext(s.Env, nil, nil); err != nil {
// Error accessing the data - log and close, maybe a different pilot replica
// has more luck
log.Warnf("Error reading config %v", err)
return status.Error(codes.Unavailable, "error reading config")
}
// 封装成Connection结构体
con := newConnection(peerAddr, stream)
// 对当前连接进行初始化,比如创建workloadentry资源
// 接受服务传来的数据,放入reqChan,然后触发下面的for-select循环
go s.receive(con, ids)
<-con.initialized
// 这里是envoy->istiod istiod->envoy双方互发数据的流程
for {
select {
// 这里是envoy->istiod
case req, ok := <-con.reqChan:
if ok {
if err := s.processRequest(req, con); err != nil {
return err
}
} else {
// Remote side closed connection or error processing the request.
return <-con.errorChan
}
// 这里是istiod->envoy
case pushEv := <-con.pushChannel:
err := s.pushConnection(con, pushEv)
pushEv.done()
if err != nil {
return err
}
case <-con.stop:
return nil
}
}
}
这里我们重点关注s.receive(con, ids)方法,至于下面的数据互发流程,会在本文正文(是的现在是前序)讲到
func (s *DiscoveryServer) receive(con *Connection, identities []string) {
defer func() {
close(con.errorChan)
close(con.reqChan)
// Close the initialized channel, if its not already closed, to prevent blocking the stream.
select {
case <-con.initialized:
default:
close(con.initialized)
}
}()
firstRequest := true
for {
// 接受请求信息
req, err := con.stream.Recv()
if err != nil {
if istiogrpc.IsExpectedGRPCError(err) {
log.Infof("ADS: %q %s terminated", con.peerAddr, con.conID)
return
}
con.errorChan <- err
log.Errorf("ADS: %q %s terminated with error: %v", con.peerAddr, con.conID, err)
totalXDSInternalErrors.Increment()
return
}
// This should be only set for the first request. The node id may not be set - for example malicious clients.
//第一次请求,会对其进行初始化操作
if firstRequest {
// probe happens before envoy sends first xDS request
if req.TypeUrl == v3.HealthInfoType {
log.Warnf("ADS: %q %s send health check probe before normal xDS request", con.peerAddr, con.conID)
continue
}
firstRequest = false
if req.Node == nil || req.Node.Id == "" {
con.errorChan <- status.New(codes.InvalidArgument, "missing node information").Err()
return
}
// 将con添加到xds当中,并创建workloadentry资源
if err := s.initConnection(req.Node, con, identities); err != nil {
con.errorChan <- err
return
}
defer s.closeConnection(con)
log.Infof("ADS: new connection for node:%s", con.conID)
}
// 将请求信息传入reqChan管道,然后由监听事件进行处理
select {
case con.reqChan <- req:
case <-con.stream.Context().Done():
log.Infof("ADS: %q %s terminated with stream closed", con.peerAddr, con.conID)
return
}
}
}
// 进入initConnection
func (s *DiscoveryServer) initConnection(node *core.Node, con *Connection, identities []string) error {
// Setup the initial proxy metadata
proxy, err := s.initProxyMetadata(node)
if err != nil {
return err
}
// proxy所标识的集群如果有别名,则使用别名
if alias, exists := s.ClusterAliases[proxy.Metadata.ClusterID]; exists {
proxy.Metadata.ClusterID = alias
}
// 获取最新的上下文信息
proxy.LastPushContext = s.globalPushContext()
con.conID = connectionID(proxy.ID)
con.node = node
con.proxy = proxy
// 这里会进行判断如果没有进行认证,则直接返回
if err := s.authorize(con, identities); err != nil {
return err
}
// 将con添加到xds当中
s.addCon(con.conID, con)
defer close(con.initialized)
// 初始化代理,这里会调用workloadentry控制器对该服务创建workloadentry
if err := s.initializeProxy(node, con); err != nil {
s.closeConnection(con)
return err
}
if s.StatusGen != nil {
s.StatusGen.OnConnect(con)
}
return nil
}
func (s *DiscoveryServer) initializeProxy(node *core.Node, con *Connection) error {
proxy := con.proxy
// this should be done before we look for service instances, but after we load metadata
// TODO fix check in kubecontroller treat echo VMs like there isn't a pod
// 创建WorkloadEntry
if err := s.WorkloadEntryController.RegisterWorkload(proxy, con.connectedAt); err != nil {
return err
}
// 设置SidecarScope
// SidecarScope 是在 Istio 1.1 版本中引入的,
//它并不是一个直接面向用户的配置项,而是 Sidecar 资源的包装器,
//具体来说就是 Sidecar 资源中的 egress 选项。通过该配置可以减少 Istio 向 Sidecar 下发的数据量,
//例如只向某个命名空间中的某些服务下发某些 hosts 的访问配置,从而提高应用提高性能。
s.computeProxyState(proxy, nil)
//获取当前连接所在的区域,区域可以进行设置,比如目的规则里面的Region
if len(proxy.ServiceInstances) > 0 {
proxy.Locality = util.ConvertLocality(proxy.ServiceInstances[0].Endpoint.Locality.Label)
}
// If there is no locality in the registry then use the one sent as part of the discovery request.
// This is not preferable as only the connected Pilot is aware of this proxies location, but it
// can still help provide some client-side Envoy context when load balancing based on location.
if util.IsLocalityEmpty(proxy.Locality) {
proxy.Locality = &core.Locality{
Region: node.Locality.GetRegion(),
Zone: node.Locality.GetZone(),
SubZone: node.Locality.GetSubZone(),
}
}
locality := util.LocalityToString(proxy.Locality)
// add topology labels to proxy metadata labels
proxy.Metadata.Labels = labelutil.AugmentLabels(proxy.Metadata.Labels, proxy.Metadata.ClusterID, locality, proxy.Metadata.Network)
// Discover supported IP Versions of proxy so that appropriate config can be delivered.
proxy.DiscoverIPMode()
proxy.WatchedResources = map[string]*model.WatchedResource{}
// Based on node metadata and version, we can associate a different generator.
if proxy.Metadata.Generator != "" {
proxy.XdsResourceGenerator = s.Generators[proxy.Metadata.Generator]
}
return nil
}
至此服务发现功能介绍完毕
serviceEntryHandler
serviceEntry资源主要将外部服务注册到中心中,当我们更新serviceEntry资源时会触发serviceEntryHandler方法,该方法根据事件的类型对缓存更新或者删除,调用XDS将当前资源进行推送
serviceEntry的流程:
- 判断事件类型(增删改),将serviceEntry转换成serviceInstances
- cache更新
- xds推送
这里我们需要注意在serviceEntry->serviceInstances的转换过程中,会根据hosts,address,ports的乘机创建Instances,比如hosts[“zhangsan.com”,“lisi.com”],address[“127.0.0.2”,“127.0.0.3”],ports[80,8080]那么就会生成8个ServiceInstance.
//主要提供了外部服务注册时的一系列操作
func (s *Controller) serviceEntryHandler(_, curr config.Config, event model.Event) {
currentServiceEntry := curr.Spec.(*networking.ServiceEntry)
//将当前ServiceEntry 转换为Serivce资源,将host与adress进行一一对应两个集合之间做积运算
// 得出 []mode.service
cs := convertServices(curr)
configsUpdated := map[model.ConfigKey]struct{}{}
key := types.NamespacedName{Namespace: curr.Namespace, Name: curr.Name}
s.mutex.Lock()
// If it is add/delete event we should always do a full push. If it is update event, we should do full push,
// only when services have changed - otherwise, just push endpoint updates.
var addedSvcs, deletedSvcs, updatedSvcs, unchangedSvcs []*model.Service
switch event {
case model.EventUpdate:
//如果是更新,首先会获取所有的mdoe.service,然后与上面的cs进行比较,不存在就放到add里面等
addedSvcs, deletedSvcs, updatedSvcs, unchangedSvcs = servicesDiff(s.services.getServices(key), cs)
//更新缓存中的所有mdoe.service
s.services.updateServices(key, cs)
case model.EventDelete:
deletedSvcs = cs
s.services.deleteServices(key)
case model.EventAdd:
addedSvcs = cs
s.services.updateServices(key, cs)
default:
// 默认没有改变
unchangedSvcs = cs
}
//根据mode.service构建 serviceInstances 这是istio中的一个重要的结构体,它不存在于k8s中
serviceInstancesByConfig, serviceInstances := s.buildServiceInstances(curr, cs)
//获取原本存在的SerivceInstance进行新旧交替
oldInstances := s.serviceInstances.getServiceEntryInstances(key)
for configKey, old := range oldInstances {
s.serviceInstances.deleteInstances(configKey, old)
}
if event == model.EventDelete {
s.serviceInstances.deleteAllServiceEntryInstances(key)
} else {
// 从多个维度存储 serviceInstance ,IP地址维度,namespaces/hosts 维度
for ckey, value := range serviceInstancesByConfig {
s.serviceInstances.addInstances(ckey, value)
}
s.serviceInstances.updateServiceEntryInstances(key, serviceInstancesByConfig)
}
shard := model.ShardKeyFromRegistry(s)
for _, svc := range addedSvcs {
// 发送更新统计
s.XdsUpdater.SvcUpdate(shard, string(svc.Hostname), svc.Attributes.Namespace, model.EventAdd)
configsUpdated[makeConfigKey(svc)] = struct{}{}
}
for _, svc := range updatedSvcs {
s.XdsUpdater.SvcUpdate(shard, string(svc.Hostname), svc.Attributes.Namespace, model.EventUpdate)
configsUpdated[makeConfigKey(svc)] = struct{}{}
}
// If service entry is deleted, call SvcUpdate to cleanup endpoint shards for services.
for _, svc := range deletedSvcs {
instanceKey := instancesKey{namespace: svc.Attributes.Namespace, hostname: svc.Hostname}
// There can be multiple service entries of same host reside in same namespace.
// Delete endpoint shards only if there are no service instances.
if len(s.serviceInstances.getByKey(instanceKey)) == 0 {
s.XdsUpdater.SvcUpdate(shard, string(svc.Hostname), svc.Attributes.Namespace, model.EventDelete)
}
configsUpdated[makeConfigKey(svc)] = struct{}{}
}
// If a service is updated and is not part of updatedSvcs, that means its endpoints might have changed.
// If this service entry had endpoints with IPs (i.e. resolution STATIC), then we do EDS update.
// If the service entry had endpoints with FQDNs (i.e. resolution DNS), then we need to do
// full push (as fqdn endpoints go via strict_dns clusters in cds).
if len(unchangedSvcs) > 0 {
if currentServiceEntry.Resolution == networking.ServiceEntry_DNS || currentServiceEntry.Resolution == networking.ServiceEntry_DNS_ROUND_ROBIN {
for _, svc := range unchangedSvcs {
configsUpdated[makeConfigKey(svc)] = struct{}{}
}
}
}
s.mutex.Unlock()
fullPush := len(configsUpdated) > 0
// if not full push needed, at least one service unchanged
if !fullPush {
s.edsUpdate(serviceInstances)
return
}
// When doing a full push, the non DNS added, updated, unchanged services trigger an eds update
// so that endpoint shards are updated.
allServices := make([]*model.Service, 0, len(addedSvcs)+len(updatedSvcs)+len(unchangedSvcs))
nonDNSServices := make([]*model.Service, 0, len(addedSvcs)+len(updatedSvcs)+len(unchangedSvcs))
allServices = append(allServices, addedSvcs...)
allServices = append(allServices, updatedSvcs...)
allServices = append(allServices, unchangedSvcs...)
for _, svc := range allServices {
if !(svc.Resolution == model.DNSLB || svc.Resolution == model.DNSRoundRobinLB) {
nonDNSServices = append(nonDNSServices, svc)
}
}
// non dns service instances
keys := map[instancesKey]struct{}{}
for _, svc := range nonDNSServices {
keys[instancesKey{hostname: svc.Hostname, namespace: curr.Namespace}] = struct{}{}
}
// 更新当前缓存
s.queueEdsEvent(keys, s.doEdsCacheUpdate)
// 推送当前配置
pushReq := &model.PushRequest{
Full: true,
ConfigsUpdated: configsUpdated,
Reason: []model.TriggerReason{model.ServiceUpdate},
}
s.XdsUpdater.ConfigUpdate(pushReq)
}
当推送完后会根据Reason的类型生成envoy配置,然后推送给envoy
workloadEntryHandler
上一篇,我们讲过上游服务在注册到注册中心是发送grpc请求进行注册,注册handler接受到请求进行权限认证后就为其创建workloadEntry资源,但是在创建完workloadEntry资源后整个handler流程就结束了,那么后续的事情是什么那,接下来就让我们进行探索.
当创建完成workloadEntry资源前会判断是否有workloadEntryGroup如果有,才会去创建,会触发workloadEntryHandler方法(在上一篇有讲解为什么会触发这个方法),
- 将workloadEntry转化为workloadInstance
- 获取当前命名空间下的ServiceEntry,然后根据ServiceEntry的workloadSelect
initMulticluster
该结构体启动后的主要目的是监听当前集群上的原生资源(pod,service等)信息,根据这些信息动态添加到istio中,例如,当我们添加POD后,它会根据pod获取与之关联的endpoint(k8s原生的)然后转换为Istioendpoint推送给envoy(这里还会将istioendpoint转换为envoy识别的配置)
接下来让我们进入代码去探索一下原生资源是如何被转化成istio资源的
func (s *Server) initMulticluster(args *PilotArgs) {
...
s.addStartFunc(func(stop <-chan struct{}) error {
return s.multiclusterController.Run(stop)
})
}
// 上面含义在上一篇创建 中有所讲解,这里不再叙述我们直接进入Run
func (c *Controller) Run(stopCh <-chan struct{}) error {
// 对于本地集群进行处理
localCluster := &Cluster{Client: c.localClusterClient, ID: c.localClusterID}
if err := c.handleAdd(localCluster, stopCh);
go func() {
t0 := time.Now()
// 这里开始监控secert资源,监控远程集群注册动态
go c.informer.Run(stopCh)
' // 这里会进行超时缓存等待,等待远程缓存同步
if !kube.WaitForCacheSyncInterval(stopCh, c.syncInterval, c.informer.HasSynced) {
log.Error("Failed to sync multicluster remote secrets controller cache")
return
}
// 判断超时等待
if features.RemoteClusterTimeout != 0 {
time.AfterFunc(features.RemoteClusterTimeout, func() {
c.remoteSyncTimeout.Store(true)
})
}
// 控制器运行,开始根据secert的值对集群进行注册(我称之为注册,其实是将集群中的资源存储到istio中)
c.queue.Run(stopCh)
}()
return nil
}
我们重点关注handleAdd,至于下面的注册远程集群,其实也是调用handleAdd方法
handleAdd方法做了以下几步
- 根据当前secert.data生成envoy认证信息,这样可以供envoy进行访问
- 创建k8s资源管理器(主要是创建informer,然后设置informer事件)
- 根据配置参数,判断是否开启夸集群资源关联操作
本地集群资源管理
接下来让我们重点关注2,3,第一步主要是secertGen配置生成器,生成envoy认证配置然后推送给上游proxy
func (c *Controller) handleAdd(cluster *Cluster, stop <-chan struct{}) error {
var errs *multierror.Error
// 循环调用,现在有两个handler,第一个handler运行了第一步的功能
// 第二个handler运行了2,3步的功能
for _, handler := range c.handlers {
errs = multierror.Append(errs, handler.ClusterAdded(cluster, stop))
}
return errs.ErrorOrNil()
}
// 2,3步
func (m *Multicluster) ClusterAdded(cluster *multicluster.Cluster, clusterStopCh <-chan struct{}) error {
client := cluster.Client
options := m.opts
options.ClusterID = cluster.ID
options.SyncTimeout = cluster.SyncTimeout
// 判断endpoint是否为分片
options.EndpointMode = DetectEndpointMode(client)
//创建管理器,主要管理原生资源
kubeRegistry := NewController(client, options)
// 根据集群的id 存放注册控制器
m.remoteKubeControllers[cluster.ID] = &kubeController{
Controller: kubeRegistry,
}
// 这里可以设置本地集群的名称,默认为kubernetes
localCluster := m.opts.ClusterID == cluster.ID
m.m.Unlock()
// 为workloadEntryHandler添加前置处理器,就是上面我们说到的,会判断serviceEntry中WorkloadSelector匹配的项目,然后进行转换推送给上游proxy
if m.serviceEntryController != nil && features.EnableServiceEntrySelectPods {
// Add an instance handler in the kubernetes registry to notify service entry store about pod events
kubeRegistry.AppendWorkloadHandler(m.serviceEntryController.WorkloadInstanceHandler)
}
if m.serviceEntryController != nil && localCluster {
// 为workloadHandlers添加handler,该handler主要作用是根据workload获取service然后推送到envoy中
m.serviceEntryController.AppendWorkloadHandler(kubeRegistry.WorkloadInstanceHandler)
// 下面如果为真,那么一个集群的serviceEntry可以使用另一个集群的workloadentry
} else if features.WorkloadEntryCrossCluster {
// TODO only do this for non-remotes, can't guarantee CRDs in remotes (depends on https://github.com/istio/istio/pull/29824)
if configStore, err := createWleConfigStore(client, m.revision, options); err == nil {
m.remoteKubeControllers[cluster.ID].workloadEntryController = serviceentry.NewWorkloadEntryController(
configStore, model.MakeIstioStore(configStore), options.XDSUpdater,
serviceentry.WithClusterID(cluster.ID),
serviceentry.WithNetworkIDCb(kubeRegistry.Network))
// Services can select WorkloadEntry from the same cluster. We only duplicate the Service to configure kube-dns.
m.remoteKubeControllers[cluster.ID].workloadEntryController.AppendWorkloadHandler(kubeRegistry.WorkloadInstanceHandler)
// ServiceEntry selects WorkloadEntry from remote cluster
m.remoteKubeControllers[cluster.ID].workloadEntryController.AppendWorkloadHandler(m.serviceEntryController.WorkloadInstanceHandler)
m.opts.MeshServiceController.AddRegistryAndRun(m.remoteKubeControllers[cluster.ID].workloadEntryController, clusterStopCh)
go configStore.Run(clusterStopCh)
} else {
return fmt.Errorf("failed creating config configStore for cluster %s: %v", cluster.ID, err)
}
}
// run after WorkloadHandler is added
m.opts.MeshServiceController.AddRegistryAndRun(kubeRegistry, clusterStopCh)
if m.startNsController && (features.ExternalIstiod || localCluster) {
// Block server exit on graceful termination of the leader controller.
m.s.RunComponentAsyncAndWait(func(_ <-chan struct{}) error {
log.Infof("joining leader-election for %s in %s on cluster %s",
leaderelection.NamespaceController, options.SystemNamespace, options.ClusterID)
leaderelection.
NewLeaderElectionMulticluster(options.SystemNamespace, m.serverID, leaderelection.NamespaceController, m.revision, !localCluster, client).
AddRunFunction(func(leaderStop <-chan struct{}) {
log.Infof("starting namespace controller for cluster %s", cluster.ID)
nc := NewNamespaceController(client, m.caBundleWatcher)
// Start informers again. This fixes the case where informers for namespace do not start,
// as we create them only after acquiring the leader lock
// Note: stop here should be the overall pilot stop, NOT the leader election stop. We are
// basically lazy loading the informer, if we stop it when we lose the lock we will never
// recreate it again.
client.RunAndWait(clusterStopCh)
nc.Run(leaderStop)
}).Run(clusterStopCh)
return nil
})
}
// The local cluster has this patching set-up elsewhere. We may eventually want to move it here.
if features.ExternalIstiod && !localCluster && m.caBundleWatcher != nil {
// Patch injection webhook cert
// This requires RBAC permissions - a low-priv Istiod should not attempt to patch but rely on
// operator or CI/CD
if features.InjectionWebhookConfigName != "" {
// TODO prevent istiods in primary clusters from trying to patch eachother. should we also leader-elect?
log.Infof("initializing webhook cert patch for cluster %s", cluster.ID)
patcher, err := webhooks.NewWebhookCertPatcher(client, m.revision, webhookName, m.caBundleWatcher)
if err != nil {
log.Errorf("could not initialize webhook cert patcher: %v", err)
} else {
go patcher.Run(clusterStopCh)
}
}
// Patch validation webhook cert
go controller.NewValidatingWebhookController(client, m.revision, m.secretNamespace, m.caBundleWatcher).Run(clusterStopCh)
}
// setting up the serviceexport controller if and only if it is turned on in the meshconfig.
// TODO(nmittler): Need a better solution. Leader election doesn't take into account locality.
if features.EnableMCSAutoExport {
log.Infof("joining leader-election for %s in %s on cluster %s",
leaderelection.ServiceExportController, options.SystemNamespace, options.ClusterID)
// Block server exit on graceful termination of the leader controller.
m.s.RunComponentAsyncAndWait(func(_ <-chan struct{}) error {
leaderelection.
NewLeaderElection(options.SystemNamespace, m.serverID, leaderelection.ServiceExportController, m.revision, client).
AddRunFunction(func(leaderStop <-chan struct{}) {
serviceExportController := newAutoServiceExportController(autoServiceExportOptions{
Client: client,
ClusterID: options.ClusterID,
DomainSuffix: options.DomainSuffix,
ClusterLocal: m.clusterLocal,
})
// Start informers again. This fixes the case where informers do not start,
// as we create them only after acquiring the leader lock
// Note: stop here should be the overall pilot stop, NOT the leader election stop. We are
// basically lazy loading the informer, if we stop it when we lose the lock we will never
// recreate it again.
client.RunAndWait(clusterStopCh)
serviceExportController.Run(leaderStop)
}).Run(clusterStopCh)
return nil
})
}
return nil
}
对于k8s资源的管理,我们主要看kubeRegistry := NewController(client, options)这个方法,该方法主要对service,node,POD,Endpoints资源进行管理
func NewController(kubeClient kubelib.Client, options Options) *Controller {
c := &Controller{
opts: options,
client: kubeClient,
queue: queue.NewQueueWithID(1*time.Second, string(options.ClusterID)),
servicesMap: make(map[host.Name]*model.Service),
nodeSelectorsForServices: make(map[host.Name]labels.Instance),
nodeInfoMap: make(map[string]kubernetesNode),
externalNameSvcInstanceMap: make(map[host.Name][]*model.ServiceInstance),
workloadInstancesIndex: workloadinstances.NewIndex(),
informerInit: atomic.NewBool(false),
beginSync: atomic.NewBool(false),
initialSync: atomic.NewBool(false),
multinetwork: initMultinetwork(),
}
if features.EnableMCSHost {
c.hostNamesForNamespacedName = func(name types.NamespacedName) []host.Name {
return []host.Name{
kube.ServiceHostname(name.Name, name.Namespace, c.opts.DomainSuffix),
serviceClusterSetLocalHostname(name),
}
}
c.servicesForNamespacedName = func(name types.NamespacedName) []*model.Service {
out := make([]*model.Service, 0, 2)
c.RLock()
if svc := c.servicesMap[kube.ServiceHostname(name.Name, name.Namespace, c.opts.DomainSuffix)]; svc != nil {
out = append(out, svc)
}
if svc := c.servicesMap[serviceClusterSetLocalHostname(name)]; svc != nil {
out = append(out, svc)
}
c.RUnlock()
return out
}
} else {
c.hostNamesForNamespacedName = func(name types.NamespacedName) []host.Name {
return []host.Name{
kube.ServiceHostname(name.Name, name.Namespace, c.opts.DomainSuffix),
}
}
c.servicesForNamespacedName = func(name types.NamespacedName) []*model.Service {
if svc := c.GetService(kube.ServiceHostname(name.Name, name.Namespace, c.opts.DomainSuffix)); svc != nil {
return []*model.Service{svc}
}
return nil
}
}
c.nsInformer = kubeClient.KubeInformer().Core().V1().Namespaces().Informer()
c.nsLister = kubeClient.KubeInformer().Core().V1().Namespaces().Lister()
if c.opts.SystemNamespace != "" {
nsInformer := filter.NewFilteredSharedIndexInformer(func(obj interface{}) bool {
ns, ok := obj.(*v1.Namespace)
if !ok {
log.Warnf("Namespace watch getting wrong type in event: %T", obj)
return false
}
return ns.Name == c.opts.SystemNamespace
}, c.nsInformer)
// 为informer注册事件
c.registerHandlers(nsInformer, "Namespaces", c.onSystemNamespaceEvent, nil)
}
// 根据mesh.discoverySelectors配置创建的过滤器
// 功能是选择指定的命名空间集,如果不指定k8s资源管理器会管理所有命名空间的资源
// 如果指定了则会管理指定的命名空间中的k8s资源
if c.opts.DiscoveryNamespacesFilter == nil {
c.opts.DiscoveryNamespacesFilter = filter.NewDiscoveryNamespacesFilter(c.nsLister, options.MeshWatcher.Mesh().DiscoverySelectors)
}
// 当mesh配置更改时,更新k8s资源
c.initDiscoveryHandlers(kubeClient, options.EndpointMode, options.MeshWatcher, c.opts.DiscoveryNamespacesFilter)
// 创建service监听器,然后添加监听事件
// 该事件主要做了将service转换为istioService然后推送给上游proxy
// 本文会具体讲解它
c.serviceInformer = filter.NewFilteredSharedIndexInformer(c.opts.DiscoveryNamespacesFilter.Filter, kubeClient.KubeInformer().Core().V1().Services().Informer())
c.serviceLister = listerv1.NewServiceLister(c.serviceInformer.GetIndexer())
c.registerHandlers(c.serviceInformer, "Services", c.onServiceEvent, nil)
// 这里是对endpoint模式的判断如果采用分片则创建分片控制器
// 这里会根据endpoint去istioService缓存中查找ServiceInstance然后封装推送给上游proxy
switch options.EndpointMode {
case EndpointsOnly:
c.endpoints = newEndpointsController(c)
case EndpointSliceOnly:
c.endpoints = newEndpointSliceController(c)
}
// 主要更新有nodeport相关的serviceInstance,原理与上面类似
c.nodeInformer = kubeClient.KubeInformer().Core().V1().Nodes().Informer()
c.nodeLister = kubeClient.KubeInformer().Core().V1().Nodes().Lister()
c.registerHandlers(c.nodeInformer, "Nodes", c.onNodeEvent, nil)
// 监听POD信息,主要功能为,获取与POD相关的Endpoint然后与上面流程一样查找对应的ServiceInstance然后推送给上游proxy
podInformer := filter.NewFilteredSharedIndexInformer(c.opts.DiscoveryNamespacesFilter.Filter, kubeClient.KubeInformer().Core().V1().Pods().Informer())
c.pods = newPodCache(c, podInformer, func(key string) {
item, exists, err := c.endpoints.getInformer().GetIndexer().GetByKey(key)
if err != nil {
log.Debugf("Endpoint %v lookup failed with error %v, skipping stale endpoint", key, err)
return
}
if !exists {
log.Debugf("Endpoint %v not found, skipping stale endpoint", key)
return
}
if shouldEnqueue("Pods", c.beginSync) {
c.queue.Push(func() error {
return c.endpoints.onEvent(item, model.EventUpdate)
})
}
})
c.registerHandlers(c.pods.informer, "Pods", c.pods.onEvent, nil)
c.exports = newServiceExportCache(c)
c.imports = newServiceImportCache(c)
return c
}
K8sService资源的管理
接下来让我们拿出一个资源Service进行深入的了解一下对k8s资源到底是如何进行使用的.
func (c *Controller) onServiceEvent(curr interface{}, event model.Event) error {
// 将当前事件转化为v1.Service,了解client-go的同学应该明白,当事件触发后实际上需要转换一下
svc, err := convertToService(curr)
if err != nil {
log.Errorf(err)
return nil
}
log.Debugf("Handle event %s for service %s in namespace %s", event, svc.Name, svc.Namespace)
// 这里会将k8sService转化为IstioService
svcConv := kube.ConvertService(*svc, c.opts.DomainSuffix, c.Cluster())
// 判断事件类型,这里我们假设是添加操作
switch event {
case model.EventDelete:
c.deleteService(svcConv)
default:
c.addOrUpdateService(svc, svcConv, event, false)
}
return nil
}
K8sService->IstioService
对
// 接下来让我们看一下k8sService如何转化成IstioService的
func ConvertService(svc coreV1.Service, domainSuffix string, clusterID cluster.ID) *model.Service {
addr := constants.UnspecifiedIP // 0.0.0.0 赋默认值
resolution := model.ClientSideLB //
meshExternal := false
// 判断Service的类型,如果是ExternalName,并且ExternalName有值说明当前地址类型为DNS
if svc.Spec.Type == coreV1.ServiceTypeExternalName && svc.Spec.ExternalName != "" {
resolution = model.DNSLB
meshExternal = true
}
// 判断目的地址,有四种类型
// ClientSideLB 意味着代理将从其本地 lb 池中决定端点
// DNSLB 表示代理会解析一个 DNS 地址并转发到解析后的地址
// Passthrough 意味着代理应该将流量转发到调用者请求的目标 IP
// DNSRoundRobinLB 暗示代理会解析一个DNS地址并转发到解析后的地址
if svc.Spec.ClusterIP == coreV1.ClusterIPNone { // headless services should not be load balanced
resolution = model.Passthrough
} else if svc.Spec.ClusterIP != "" {
addr = svc.Spec.ClusterIP
}
// 这里是对ServicePorts的转换
// 这里需要注意,新手在使用的过程中容易犯的错误,port的名称如果不正确,又没有指定AppProtocol会造成传输协议不正确,无法访问
// 1.判断proto是否为udp,是则直接返回UDP
// 2. 根据appProto判断,就是赋值然后返回
// 3. 判断是否为GRPCWeb
// 4. 然后根据name进行判断
// 5. 如果都不是则默认为TCP
ports := make([]*model.Port, 0, len(svc.Spec.Ports))
for _, port := range svc.Spec.Ports {
ports = append(ports, convertPort(port))
}
// 下面是根据注释设置的一些配置
// 根据networking.istio.io/exportTo来行使exportTo的作用,指定命名空间
var exportTo map[visibility.Instance]bool
serviceaccounts := make([]string, 0)
if svc.Annotations[annotation.AlphaCanonicalServiceAccounts.Name] != "" {
serviceaccounts = append(serviceaccounts, strings.Split(svc.Annotations[annotation.AlphaCanonicalServiceAccounts.Name], ",")...)
}
if svc.Annotations[annotation.AlphaKubernetesServiceAccounts.Name] != "" {
for _, ksa := range strings.Split(svc.Annotations[annotation.AlphaKubernetesServiceAccounts.Name], ",") {
serviceaccounts = append(serviceaccounts, kubeToIstioServiceAccount(ksa, svc.Namespace))
}
}
if svc.Annotations[annotation.NetworkingExportTo.Name] != "" {
namespaces := strings.Split(svc.Annotations[annotation.NetworkingExportTo.Name], ",")
exportTo = make(map[visibility.Instance]bool, len(namespaces))
for _, ns := range namespaces {
exportTo[visibility.Instance(ns)] = true
}
}
istioService := &model.Service{
Hostname: ServiceHostname(svc.Name, svc.Namespace, domainSuffix),
ClusterVIPs: model.AddressMap{
Addresses: map[cluster.ID][]string{
clusterID: {addr},
},
},
Ports: ports,
DefaultAddress: addr,
ServiceAccounts: serviceaccounts,
MeshExternal: meshExternal,
Resolution: resolution,
CreationTime: svc.CreationTimestamp.Time,
ResourceVersion: svc.ResourceVersion,
Attributes: model.ServiceAttributes{
ServiceRegistry: provider.Kubernetes,
Name: svc.Name,
Namespace: svc.Namespace,
Labels: svc.Labels,
ExportTo: exportTo,
LabelSelectors: svc.Spec.Selector,
},
}
// 下面是根据NodePort进行的一些列配置操作
switch svc.Spec.Type {
case coreV1.ServiceTypeNodePort:
if _, ok := svc.Annotations[NodeSelectorAnnotation]; !ok {
// only do this for istio ingress-gateway services
break
}
// store the service port to node port mappings
portMap := make(map[uint32]uint32)
for _, p := range svc.Spec.Ports {
portMap[uint32(p.Port)] = uint32(p.NodePort)
}
istioService.Attributes.ClusterExternalPorts = map[cluster.ID]map[uint32]uint32{clusterID: portMap}
// address mappings will be done elsewhere
case coreV1.ServiceTypeLoadBalancer:
if len(svc.Status.LoadBalancer.Ingress) > 0 {
var lbAddrs []string
for _, ingress := range svc.Status.LoadBalancer.Ingress {
if len(ingress.IP) > 0 {
lbAddrs = append(lbAddrs, ingress.IP)
} else if len(ingress.Hostname) > 0 {
// DO NOT resolve the DNS here. In environments like AWS, the ELB hostname
// does not have a repeatable DNS address and IPs resolved at an earlier point
// in time may not work. So, when we get just hostnames instead of IPs, we need
// to smartly switch from EDS to strict_dns rather than doing the naive thing of
// resolving the DNS name and hoping the resolution is one-time task.
lbAddrs = append(lbAddrs, ingress.Hostname)
}
}
if len(lbAddrs) > 0 {
istioService.Attributes.ClusterExternalAddresses.SetAddressesFor(clusterID, lbAddrs)
}
}
}
istioService.Attributes.ClusterExternalAddresses.AddAddressesFor(clusterID, svc.Spec.ExternalIPs)
return istioService
}
资源推送
既然该转换的也转换完成了那么接下来就是对其进行推送了
func (c *Controller) addOrUpdateService(svc *v1.Service, svcConv *model.Service, event model.Event, updateEDSCache bool) {
// 全量推送判断值
needsFullPush := false
// First, process nodePort gateway service, whose externalIPs specified
// and loadbalancer gateway service
//处理nodePort网关服务,指定externalIPs
//和负载均衡器网关服务
if !svcConv.Attributes.ClusterExternalAddresses.IsEmpty() {
needsFullPush = c.extractGatewaysFromService(svcConv)
} else if isNodePortGatewayService(svc) {
// We need to know which services are using node selectors because during node events,
// we have to update all the node port services accordingly.
nodeSelector := getNodeSelectorsForService(svc)
c.Lock()
// only add when it is nodePort gateway service
c.nodeSelectorsForServices[svcConv.Hostname] = nodeSelector
c.Unlock()
needsFullPush = c.updateServiceNodePortAddresses(svcConv)
}
// instance conversion is only required when service is added/updated.
// 根据port生成istioEndpoint
instances := kube.ExternalNameServiceInstances(svc, svcConv)
c.Lock()
c.servicesMap[svcConv.Hostname] = svcConv
if len(instances) > 0 {
c.externalNameSvcInstanceMap[svcConv.Hostname] = instances
}
c.Unlock()
// 这里会根据上面的操作进行判断,如果需要全量更新,那么要把所有的EDS全部生成一遍
if needsFullPush {
// networks are different, we need to update all eds endpoints
c.opts.XDSUpdater.ConfigUpdate(&model.PushRequest{Full: true, Reason: []model.TriggerReason{model.NetworksTrigger}})
}
shard := model.ShardKeyFromRegistry(c)
ns := svcConv.Attributes.Namespace
// We also need to update when the Service changes. For Kubernetes, a service change will result in Endpoint updates,
// but workload entries will also need to be updated.
// TODO(nmittler): Build different sets of endpoints for cluster.local and clusterset.local.
//这里根据K8sservice获取K8sendpoint然后转换为IstioEndpoint
endpoints := c.buildEndpointsForService(svcConv, updateEDSCache)
if len(endpoints) > 0 {
// 这里进行缓存更新
c.opts.XDSUpdater.EDSCacheUpdate(shard, string(svcConv.Hostname), ns, endpoints)
}
c.opts.XDSUpdater.SvcUpdate(shard, string(svcConv.Hostname), ns, event)
// 主要向上游推送service更新命令
c.handlers.NotifyServiceHandlers(svcConv, event)
}
服务启动
上文可以单独拿出来水一篇文章了
接下来让我们进入正文,查看一下istio是如何向envoy进行推送的,以及推送的信息时什么.
discovery的推送基本上可以分为这几个步骤
- 向push管道传入推送请求
- 接受到请求后首先做防抖处理,就是等个一段时间将当前推送信息合并一起再发送
- 创建pushContext,该上下文存储了当前系统中的所有service等信息
- 然后将pushContext与推送请求信息封装起来然后放入每个con的pushChannel中
在上文我们讲解上游服务注册的时候,在创建conn方法中的最下面有一个for-select标识了conn的推送与监听.
接下来我们就要开始讲解这两个管道的功能
// 这里是envoy->istiod istiod->envoy双方互发数据的流程
for {
select {
// 这里是envoy->istiod
case req, ok := <-con.reqChan:
if ok {
if err := s.processRequest(req, con); err != nil {
return err
}
} else {
// Remote side closed connection or error processing the request.
return <-con.errorChan
}
// 这里是istiod->envoy
case pushEv := <-con.pushChannel:
err := s.pushConnection(con, pushEv)
pushEv.done()
if err != nil {
return err
}
case <-con.stop:
return nil
}
}
这里我们只探讨istiod->envoy,也就是pushConnection方法
// Compute and send the new configuration for a connection.
func (s *DiscoveryServer) pushConnection(con *Connection, pushEv *Event) error {
pushRequest := pushEv.pushRequest
// 判断是否为全量推送,假设我们更新的是virtualService,那么就是全量更新
if pushRequest.Full {
// Update Proxy with current information.
//
s.updateProxy(con.proxy, pushRequest)
}
// 判断当前是否需要推送
if !s.ProxyNeedsPush(con.proxy, pushRequest) {
log.Debugf("Skipping push to %v, no updates required", con.conID)
if pushRequest.Full {
// Only report for full versions, incremental pushes do not have a new version.
reportAllEvents(s.StatusReporter, con.conID, pushRequest.Push.LedgerVersion, nil)
}
return nil
}
// Send pushes to all generators
// Each Generator is responsible for determining if the push event requires a push
// 这里获取当前conn所监听的资源
wrl, ignoreEvents := con.pushDetails()
// 然后根据每个资源生成配置推送到上游
for _, w := range wrl {
if err := s.pushXds(con, w, pushRequest); err != nil {
return err
}
}
if pushRequest.Full {
// Report all events for unwatched resources. Watched resources will be reported in pushXds or on ack.
reportAllEvents(s.StatusReporter, con.conID, pushRequest.Push.LedgerVersion, ignoreEvents)
}
proxiesConvergeDelay.Record(time.Since(pushRequest.Start).Seconds())
return nil
}
//这里会根据当前监听资源类型获取资源生成器,然后判断推送请求资源是否符合该生成器的类型,如果符合则生成envoy配置,比如对于route资源会根据service,virtualservice等资源生成.
// 在生成以后,会通过con进行流传输(也就是推送)
func (s *DiscoveryServer) pushXds(con *Connection, w *model.WatchedResource, req *model.PushRequest) error {
if w == nil {
return nil
}
// 查找生成器
gen := s.findGenerator(w.TypeUrl, con)
if gen == nil {
return nil
}
t0 := time.Now()
// If delta is set, client is requesting new resources or removing old ones. We should just generate the
// new resources it needs, rather than the entire set of known resources.
// Note: we do not need to account for unsubscribed resources as these are handled by parent removal;
// See https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#deleting-resources.
// This means if there are only removals, we will not respond.
var logFiltered string
if !req.Delta.IsEmpty() && features.PartialFullPushes &&
!con.proxy.IsProxylessGrpc() {
logFiltered = " filtered:" + strconv.Itoa(len(w.ResourceNames)-len(req.Delta.Subscribed))
w = &model.WatchedResource{
TypeUrl: w.TypeUrl,
ResourceNames: req.Delta.Subscribed.UnsortedList(),
}
}
// 生成配置
res, logdata, err := gen.Generate(con.proxy, w, req)
if err != nil || res == nil {
// If we have nothing to send, report that we got an ACK for this version.
if s.StatusReporter != nil {
s.StatusReporter.RegisterEvent(con.conID, w.TypeUrl, req.Push.LedgerVersion)
}
return err
}
defer func() { recordPushTime(w.TypeUrl, time.Since(t0)) }()
// 构造推送请求
resp := &discovery.DiscoveryResponse{
ControlPlane: ControlPlane(),
TypeUrl: w.TypeUrl,
// TODO: send different version for incremental eds
VersionInfo: req.Push.PushVersion,
Nonce: nonce(req.Push.LedgerVersion),
Resources: model.ResourcesToAny(res),
}
configSize := ResourceSize(res)
configSizeBytes.With(typeTag.Value(w.TypeUrl)).Record(float64(configSize))
ptype := "PUSH"
info := ""
if logdata.Incremental {
ptype = "PUSH INC"
}
if len(logdata.AdditionalInfo) > 0 {
info = " " + logdata.AdditionalInfo
}
if len(logFiltered) > 0 {
info += logFiltered
}
// 发送数据
if err := con.send(resp); err != nil {
if recordSendError(w.TypeUrl, err) {
log.Warnf("%s: Send failure for node:%s resources:%d size:%s%s: %v",
v3.GetShortType(w.TypeUrl), con.proxy.ID, len(res), util.ByteCount(configSize), info, err)
}
return err
}
switch {
case logdata.Incremental:
if log.DebugEnabled() {
log.Debugf("%s: %s%s for node:%s resources:%d size:%s%s",
v3.GetShortType(w.TypeUrl), ptype, req.PushReason(), con.proxy.ID, len(res), util.ByteCount(configSize), info)
}
default:
debug := ""
if log.DebugEnabled() {
// Add additional information to logs when debug mode enabled.
debug = " nonce:" + resp.Nonce + " version:" + resp.VersionInfo
}
log.Infof("%s: %s%s for node:%s resources:%d size:%v%s%s", v3.GetShortType(w.TypeUrl), ptype, req.PushReason(), con.proxy.ID, len(res),
util.ByteCount(ResourceSize(res)), info, debug)
}
return nil
}
总结
istio的代码有些绕,主要表现在handler,它会时不时add一下,所以最后都不知道handler具体调用了哪些.
本文对增量更新与全量更新没有分析,能力有限!