Kafka GroupCoordinator机制(十一):GroupCoordinator之GroupState之JoinGroupRequest分析

rebalance的第二个步骤是消费者向GroupCoordinator发送JoinGroupRequest,这里分析GroupCoordinator如何处理JoinGroupRequest

  def handleJoinGroupRequest(request: RequestChannel.Request) {
    import JavaConversions._
    //解析JoinGroupRequest
    val joinGroupRequest = request.body.asInstanceOf[JoinGroupRequest]
    val responseHeader = new ResponseHeader(request.header.correlationId)

    // the callback for sending a join-group response
    // 回调函数定义
    def sendResponseCallback(joinResult: JoinGroupResult) {
      val members = joinResult.members map { case (memberId, metadataArray) => (memberId, ByteBuffer.wrap(metadataArray)) }
      // 创建JoinGroupResponse
      val responseBody = new JoinGroupResponse(joinResult.errorCode, joinResult.generationId, joinResult.subProtocol,
        joinResult.memberId, joinResult.leaderId, members)

      trace("Sending join group response %s for correlation id %d to client %s."
        .format(responseBody, request.header.correlationId, request.header.clientId))
        // 把JoinGroupResponse放入RequestChannel中等待发送
      requestChannel.sendResponse(new RequestChannel.Response(request, new ResponseSend(request.connectionId, responseHeader, responseBody)))
    }

    if (!authorize(request.session, Read, new Resource(Group, joinGroupRequest.groupId()))) {
        // 验证失败的处理
      val responseBody = new JoinGroupResponse(
        Errors.GROUP_AUTHORIZATION_FAILED.code,
        JoinGroupResponse.UNKNOWN_GENERATION_ID,
        JoinGroupResponse.UNKNOWN_PROTOCOL,
        JoinGroupResponse.UNKNOWN_MEMBER_ID, // memberId
        JoinGroupResponse.UNKNOWN_MEMBER_ID, // leaderId
        Map.empty[String, ByteBuffer])
      requestChannel.sendResponse(new RequestChannel.Response(request, new ResponseSend(request.connectionId, responseHeader, responseBody)))
    } else {
      // let the coordinator to handle join-group
      // 把JoinGroupRequest交给GroupCoordinator.handleJoinGroup()方法进行处理
      val protocols = joinGroupRequest.groupProtocols().map(protocol =>
        (protocol.name, Utils.toArray(protocol.metadata))).toList
      coordinator.handleJoinGroup(
        joinGroupRequest.groupId,
        joinGroupRequest.memberId,
        request.header.clientId,
        request.session.clientAddress.toString,
        joinGroupRequest.sessionTimeout,
        joinGroupRequest.protocolType,
        protocols,
        sendResponseCallback)
    }
  }

在handleJoinGroup方法中,首先会进行一系列的检测,保证GroupCoordinator处于可用状态且是对应ConsumerGroup的管理者。还要对sessionTimeoutMs进行检查,它是由消费者设置的超时时长,即消费者发送HearbeatRequest的最长时间间隔,它不能超出GroupConfig中配置的超时时长区间。之后检测memberId的合法性,根据groupId决定是否创建GroupMetadata对象,最后调用doJoinGroup方法。

  def handleJoinGroup(groupId: String,
                      memberId: String,
                      clientId: String,
                      clientHost: String,
                      sessionTimeoutMs: Int,
                      protocolType: String,
                      protocols: List[(String, Array[Byte])],
                      responseCallback: JoinCallback) {
    if (!isActive.get) { // 检测GroupCoordinator是否启动
      responseCallback(joinError(memberId, Errors.GROUP_COORDINATOR_NOT_AVAILABLE.code))
    } else if (!validGroupId(groupId)) {// groupID是否合法
      responseCallback(joinError(memberId, Errors.INVALID_GROUP_ID.code))
    } else if (!isCoordinatorForGroup(groupId)) { // 检测GroupCoordinator是否管理此consumer group
      responseCallback(joinError(memberId, Errors.NOT_COORDINATOR_FOR_GROUP.code))
    } else if (isCoordinatorLoadingInProgress(groupId)) {
        // GroupCoordinator是否已经加载此consumer group对应的offsets Topic分区
      responseCallback(joinError(memberId, Errors.GROUP_LOAD_IN_PROGRESS.code))
    } else if (sessionTimeoutMs < groupConfig.groupMinSessionTimeoutMs ||
               sessionTimeoutMs > groupConfig.groupMaxSessionTimeoutMs) {
                   // 检测consumer指定超时时长是否在合法区间
      responseCallback(joinError(memberId, Errors.INVALID_SESSION_TIMEOUT.code))
    } else {
      // only try to create the group if the group is not unknown AND
      // the member id is UNKNOWN, if member is specified but group does not
      // exist we should reject the request
      var group = groupManager.getGroup(groupId)
      if (group == null) {
          // 检测memberId是否合法
        if (memberId != JoinGroupRequest.UNKNOWN_MEMBER_ID) {
          responseCallback(joinError(memberId, Errors.UNKNOWN_MEMBER_ID.code))
        } else {
            // 创建GroupMetadata对象
          group = groupManager.addGroup(new GroupMetadata(groupId, protocolType))
          // 调用doJoinGroup()方法完成后续功能
          doJoinGroup(group, memberId, clientId, clientHost, sessionTimeoutMs, protocolType, protocols, responseCallback)
        }
      } else {
        doJoinGroup(group, memberId, clientId, clientHost, sessionTimeoutMs, protocolType, protocols, responseCallback)
      }
    }
  }

GroupCoordinator.doJoinGroup()对当前Consumer Group所处的状态分类处理。

  private def doJoinGroup(group: GroupMetadata,
                          memberId: String,
                          clientId: String,
                          clientHost: String,
                          sessionTimeoutMs: Int,
                          protocolType: String,
                          protocols: List[(String, Array[Byte])],
                          responseCallback: JoinCallback) {
    group synchronized {
        // 检测member支持的PartitionAssignor
      if (group.protocolType != protocolType || !group.supportsProtocols(protocols.map(_._1).toSet)) {
        // if the new member does not support the group protocol, reject it
        responseCallback(joinError(memberId, Errors.INCONSISTENT_GROUP_PROTOCOL.code))
      } else if (memberId != JoinGroupRequest.UNKNOWN_MEMBER_ID && !group.has(memberId)) {
        // if the member trying to register with a un-recognized id, send the response to let
        // it reset its member id and retry
        // 检测memberId是否能够被识别
        responseCallback(joinError(memberId, Errors.UNKNOWN_MEMBER_ID.code))
      } else {
        group.currentState match {
          case Dead =>
            // if the group is marked as dead, it means some other thread has just removed the group
            // from the coordinator metadata; this is likely that the group has migrated to some other
            // coordinator OR the group is in a transient unstable phase. Let the member retry
            // joining without the specified member id,
            // Dead状态,直接返回错误码
            responseCallback(joinError(memberId, Errors.UNKNOWN_MEMBER_ID.code))

          case PreparingRebalance =>
            if (memberId == JoinGroupRequest.UNKNOWN_MEMBER_ID) {
                // 根据memeberId是否是UNKNOWN_MEMBER_IDPANDAUN判断消费者是否是已知member
                // 未知的member,申请加入
              addMemberAndRebalance(sessionTimeoutMs, clientId, clientHost, protocols, group, responseCallback)
            } else {
                // 已知member申请重新加入
              val member = group.get(memberId)
              updateMemberAndRebalance(group, member, protocols, responseCallback)
            }

          case AwaitingSync =>
            if (memberId == JoinGroupRequest.UNKNOWN_MEMBER_ID) {
                // 未知的member加入时会发生状态切换
              addMemberAndRebalance(sessionTimeoutMs, clientId, clientHost, protocols, group, responseCallback)
            } else {
                // 已知member申请重新加入
              val member = group.get(memberId)
              if (member.matches(protocols)) {
                // member is joining with the same metadata (which could be because it failed to
                // receive the initial JoinGroup response), so just return current group information
                // for the current generation.
                // 支持的PartitionAssignor未发生改变,返回GroupMetadata信息
                responseCallback(JoinGroupResult(
                  members = if (memberId == group.leaderId) {
                    group.currentMemberMetadata
                  } else {
                    Map.empty
                  },
                  memberId = memberId,
                  generationId = group.generationId,
                  subProtocol = group.protocol,
                  leaderId = group.leaderId,
                  errorCode = Errors.NONE.code))
              } else {
                // member has changed metadata, so force a rebalance
                // 发生改变,更新membeer信息并发送状态切换
                updateMemberAndRebalance(group, member, protocols, responseCallback)
              }
            }

          case Stable =>
            if (memberId == JoinGroupRequest.UNKNOWN_MEMBER_ID) {
              // if the member id is unknown, register the member to the group
              // 未知的member加入时会发生状态切换
              addMemberAndRebalance(sessionTimeoutMs, clientId, clientHost, protocols, group, responseCallback)
            } else {
              // 已知member申请重新加入
              val member = group.get(memberId)
              if (memberId == group.leaderId || !member.matches(protocols)) {
                // force a rebalance if a member has changed metadata or if the leader sends JoinGroup.
                // The latter allows the leader to trigger rebalances for changes affecting assignment
                // which do not affect the member metadata (such as topic metadata changes for the consumer)
                // 支持的PartitionAssignor发生改变,更新membeer信息并发送状态切换
                updateMemberAndRebalance(group, member, protocols, responseCallback)
              } else {
                // for followers with no actual change to their metadata, just return group information
                // for the current generation which will allow them to issue SyncGroup
                // 支持的PartitionAssignor未发生改变,返回GroupMetadata信息
                responseCallback(JoinGroupResult(
                  members = Map.empty,
                  memberId = memberId,
                  generationId = group.generationId,
                  subProtocol = group.protocol,
                  leaderId = group.leaderId,
                  errorCode = Errors.NONE.code))
              }
            }
        }

        if (group.is(PreparingRebalance))// 尝试完成相关的delayedJoin
          joinPurgatory.checkAndComplete(GroupKey(group.groupId))
      }
    }
  }

addMemberAndRebalance添加member信息

  private def addMemberAndRebalance(sessionTimeoutMs: Int,
                                    clientId: String,
                                    clientHost: String,
                                    protocols: List[(String, Array[Byte])],
                                    group: GroupMetadata,
                                    callback: JoinCallback) = {
    // use the client-id with a random id suffix as the member-id
    // member由cleintID和UUID构成
    val memberId = clientId + "-" + group.generateMemberIdSuffix
    // 创建新的MemberMetada对象
    val member = new MemberMetadata(memberId, group.groupId, clientId, clientHost, sessionTimeoutMs, protocols)
    // 设置awaitingJoinCallback,该回调函数是KafkaApis.handleJoinGroupRequest方法中定义的sendResponseCallback定义函数
    member.awaitingJoinCallback = callback
    // 添加到GroupMetada中保存
    group.add(member.memberId, member)
    // 尝试进行状态切换
    maybePrepareRebalance(group)
    member
  }

maybePrepareRebalance方法会判断消费者组的状态,如果是Stable或AwaitingSync,则会调用prepareRebalance方法切换成preparingRebalance,并创建对应的delayedJoin

  private def prepareRebalance(group: GroupMetadata) {
    // if any members are awaiting sync, cancel their request and have them rejoin
    // 如果处于awaitingSync状态,则要先重置memberMetadta.assignment字段,并回调waitingSyncCallback向消费者返回REBALANCE_IN_PROGRESS错误码
    if (group.is(AwaitingSync))
      resetAndPropagateAssignmentError(group, Errors.REBALANCE_IN_PROGRESS.code)
    // 把consumer gourp状态切换为PreparingRebalance状态,表示准备执行rebalance操作
    group.transitionTo(PreparingRebalance)
    info("Preparing to restabilize group %s with old generation %s".format(group.groupId, group.generationId))
    // delayedJoin的超时时长是GroupMetadta中所有member设置超时时长的最大值
    val rebalanceTimeout = group.rebalanceTimeout
    //创建delayedJoin对象
    val delayedRebalance = new DelayedJoin(this, group, rebalanceTimeout)
    val groupKey = GroupKey(group.groupId)
    // 尝试完成相关的delayedJoin,否者把delayFetch添加到joinPurgatory中
    joinPurgatory.tryCompleteElseWatch(delayedRebalance, Seq(groupKey))
  }

对于AwaitingSync状态的消费者组来说,有的group Follower已经发送了SyncGroupRequest,GroupCoordinator在等待Group Leader通过SyncGroupRequest将分区的分配结果发送过来。如果此时进行状态切换,需要对这些已经发送syncGroupRequest的Group Follower返回错误码:

  private def resetAndPropagateAssignmentError(group: GroupMetadata, errorCode: Short) {
    assert(group.is(AwaitingSync))
    // 清空所有memberMetadata的assignment字段
    group.allMemberMetadata.foreach(_.assignment = Array.empty[Byte])
    propagateAssignment(group, errorCode)
  }
  
  private def propagateAssignment(group: GroupMetadata, errorCode: Short) {
    for (member <- group.allMemberMetadata) {
      if (member.awaitingSyncCallback != null) {
          // 调用awaitingSyncCallback回调函数,向对应的Consumer发送SyncGroupResponse
        member.awaitingSyncCallback(member.assignment, errorCode)
        // 清空awaitingSyncCallback回调函数
        member.awaitingSyncCallback = null

        // reset the session timeout for members after propagating the member's assignment.
        // This is because if any member's session expired while we were still awaiting either
        // the leader sync group or the storage callback, its expiration will be ignored and no
        // future heartbeat expectations will not be scheduled.
        // 开启等待下次心跳的延迟任务。
        completeAndScheduleNextHeartbeatExpiration(group, member)
      }
    }
  }

  updateMemberAndRebalance与addMemberAndRebalance类似:

  private def updateMemberAndRebalance(group: GroupMetadata,
                                       member: MemberMetadata,
                                       protocols: List[(String, Array[Byte])],
                                       callback: JoinCallback) {
    // 更新memberMetadta支持的协议和awaitingJoinCallback回调函数
    member.supportedProtocols = protocols
    member.awaitingJoinCallback = callback
    // 尝试进行状态切换
    maybePrepareRebalance(group)
  }

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值