Spark RPC

RpcEndpoint & RpcEndpointRef & NettyRpcEndpointRef RPC 调用接口

SparkEnv : 保存了一个 Spark 运行实例的所有环境信息

RpcEnv <-- new NettyRpcEnvFactory().create(RpcEnvConfig()) 

RpcEndpointRpcEndpointRef 应该就是我们编程使用到的上层RPC编程接口了。

  • RpcEndpoint 代表一个RPC 通信终端,所以我们需要通过 rpcEnv.setupEndpoint() 这样别人才能找得到我们,和我们通信
  • 当我们想和另外一个 RpcEndpoint 通信的时候,需要 rpcEnv.setupEndpointRef() 来和远程RpcEndpoint 建立连接
  • 通过RpcEndpointRef 的send, ask 等方法和远程进行通信

实例1:

  test("send a message remotely") {
    @volatile var message: String = null
    // Set up a RpcEndpoint using env
    env.setupEndpoint("send-remotely", new RpcEndpoint {
      override val rpcEnv = env

      override def receive: PartialFunction[Any, Unit] = {
        case msg: String => message = msg
      }
    })

    val anotherEnv = createRpcEnv(new SparkConf(), "remote", 0, clientMode = true)
    // Use anotherEnv to find out the RpcEndpointRef
    val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "send-remotely")
    try {
      rpcEndpointRef.send("hello")
      eventually(timeout(5.seconds), interval(10.milliseconds)) {
        assert("hello" === message)
      }
    } finally {
      anotherEnv.shutdown()
      anotherEnv.awaitTermination()
    }
  }

样例:

  • RpcEndpointAddress : 可以根据URI 进行创建, 如 spark://send-remotely@localhost:54075
  • NettyRpcEndpointRef : 根据上面的address创建,如 NettyRpcEndpointRef(spark://send-remotely@localhost:54075)

实例2: RpcEndpointRef 发送Rpc

    // 1. 实例EndpointRef
    val verifier = new NettyRpcEndpointRef(
      conf, RpcEndpointAddress(addr.rpcAddress, RpcEndpointVerifier.NAME), this)
    
    // 2. RpcEndpointVerifier.CheckExistence(endpointRef.name) 是一个case class 对象,理论上只要是可以序列化,我们可以发送任何东西
    // 3. ask() 函数发送消息,包括消息的封装,可中断请求,超时处理,
    verifier.ask[Boolean](RpcEndpointVerifier.CheckExistence(endpointRef.name)).flatMap { find =>
      if (find) {
        Future.successful(endpointRef)
      } else {
        Future.failed(new RpcEndpointNotFoundException(uri))
      }
    }(ThreadUtils.sameThread)

NettyRpcEnv 发送消息

  // NettyRpcEnv
  private[netty] def askAbortable[T: ClassTag](
      message: RequestMessage, timeout: RpcTimeout): AbortableRpcFuture[T] = {
    val promise = Promise[Any]()
    val remoteAddr = message.receiver.address

    def onFailure(e: Throwable): Unit = {
      if (!promise.tryFailure(e)) {
        e match {
          case e : RpcEnvStoppedException => logDebug (s"Ignored failure: $e")
          case _ => logWarning(s"Ignored failure: $e")
        }
      }
    }

    def onSuccess(reply: Any): Unit = reply match {
      case RpcFailure(e) => onFailure(e)
      case rpcReply =>
        if (!promise.trySuccess(rpcReply)) {
          logWarning(s"Ignored message: $reply")
        }
    }

    def onAbort(reason: String): Unit = {
      onFailure(new RpcAbortException(reason))
    }

    try {
      if (remoteAddr == address) {
        val p = Promise[Any]()
        p.future.onComplete {
          case Success(response) => onSuccess(response)
          case Failure(e) => onFailure(e)
        }(ThreadUtils.sameThread)
        dispatcher.postLocalMessage(message, p)
      } else {
        // 1. 封装RPC 消息,并定义消息处理成功和失败的 callback function
        val rpcMessage = RpcOutboxMessage(message.serialize(this),
          onFailure,
          (client, response) => onSuccess(deserialize[Any](client, response)))
        // 2. 发送消息到 Outbox
        postToOutbox(message.receiver, rpcMessage)
        // 3. 消息处理完毕会进入onSuccess 或 onFailure, 在这两个方法里会有有promise.future 返回
        // 4. timeout 和 abort 两个函数本地调用
        promise.future.failed.foreach {
          case _: TimeoutException => rpcMessage.onTimeout()
          case _: RpcAbortException => rpcMessage.onAbort()
          case _ =>
        }(ThreadUtils.sameThread)
      }

      val timeoutCancelable = timeoutScheduler.schedule(new Runnable {
        override def run(): Unit = {
          onFailure(new TimeoutException(s"Cannot receive any reply from ${remoteAddr} " +
            s"in ${timeout.duration}"))
        }
      }, timeout.duration.toNanos, TimeUnit.NANOSECONDS)
      promise.future.onComplete { v =>
        timeoutCancelable.cancel(true)
      }(ThreadUtils.sameThread)
    } catch {
      case NonFatal(e) =>
        onFailure(e)
    }

    new AbortableRpcFuture[T](
      promise.future.mapTo[T].recover(timeout.addMessageIfTimeout)(ThreadUtils.sameThread),
      onAbort)
  }

RpcOutboxMessage

RpcOutboxMessage 包含我们要发送的消息体,处理成功响应函数,处理失败效应函数. RpcOutboxMessage 本身还是一个 RpcResponseCallback, 通过client 调用 sendRpc方法时,会进行回调自己内部的方法。

  private[netty] case class RpcOutboxMessage(
    content: ByteBuffer,
    _onFailure: (Throwable) => Unit,
    _onSuccess: (TransportClient, ByteBuffer) => Unit)
  extends OutboxMessage with RpcResponseCallback with Logging {

  private var client: TransportClient = _
  private var requestId: Long = _

  override def sendWith(client: TransportClient): Unit = {
    this.client = client
    // 发送消息本身,并注册自己为 callback function 
    this.requestId = client.sendRpc(content, this)
  }

  private[netty] def removeRpcRequest(): Unit = {
    if (client != null) {
      client.removeRpcRequest(requestId)
    } else {
      logError("Ask terminated before connecting successfully")
    }
  }

  def onTimeout(): Unit = {
    removeRpcRequest()
  }

  def onAbort(): Unit = {
    removeRpcRequest()
  }

  override def onFailure(e: Throwable): Unit = {
    _onFailure(e)
  }

  override def onSuccess(response: ByteBuffer): Unit = {
    _onSuccess(client, response)
  }

}

//TransportClient

  /**
   * Sends an opaque message to the RpcHandler on the server-side. The callback will be invoked
   * with the server's response or upon any failure.
   *
   * @param message The message to send.
   * @param callback Callback to handle the RPC's reply.
   * @return The RPC's id.
   */
  public long sendRpc(ByteBuffer message, RpcResponseCallback callback) {
    if (logger.isTraceEnabled()) {
      logger.trace("Sending RPC to {}", getRemoteAddress(channel));
    }

    long requestId = requestId();
    handler.addRpcRequest(requestId, callback);

    RpcChannelListener listener = new RpcChannelListener(requestId, callback);
    channel.writeAndFlush(new RpcRequest(requestId, new NioManagedBuffer(message)))
      .addListener(listener);

    return requestId;
  }

Outbox

Outbox 的设置主要是用于解决原有的Spark RPC消息通信时,消息无序的问题。比如先后发送消息 A , B,但是remote endpoint接收消息的顺序却是 B , A。 所以引入了Outbox, 并启动了了一个线程依次来发发送消息来保证消息的顺序性。
Issue : [SPARK-11098][Core]Add Outbox to cache the sending messages to resolve the message disorder issue

消息的接收和发送服务是两个完全异步的动作。
消息发送时机:

  1. 建立rpc连接的时候,因为存放消息的动作可能是异步的,所以连接建立好的时候,已经存在消息了,所以需要去发送。
  2. 消息到达时,如果连接已经建立好了,启动异步发送线程;如果没有准备好,不发送就是。

// Outbox 接收消息, 对消息进行包装,然后缓存到LinkedList中

NettyRpcEndpointRef::def send(message: Any)
NettyRpcEnv::def send(message: RequestMessage)
NettyRpcEnv::postToOutbox(receiver: NettyRpcEndpointRef, message: OutboxMessage)
Outbox::send(message)
    messages.add(message)


// Outbox 发送消息

遍历message list,如果存在,则发送消息

Outbox.drainOutbox() : 
    launchConnectTask()
    message = messages.poll()
RpcOutboxMessage.sendWith(client) 
TransportClient.sendRpc(message) 将message封装为RpcRequest,通过channel 发送出去

Inbox

消息来源有两类,一是接收远程 EndpointRef 发送过来的消息, 二是当前Endpoint dispatch转发过来的消息

NettyRpcEnv::def send(message: RequestMessage) // 当前Endpoint转发本地消息
NettyRpcHandler::override def receive( client: TransportClient, message: ByteBuffer) // 接收来自远程的消息

向邮箱添加消息

Dispatcher::def postRemoteMessage() def postLocalMessage() def postOneWayMessage()
Dispatcher::private def postMessage()
DedicatedMessageLoop::override def post(endpointName: String, message: InboxMessage)
Inbox::def post(message: InboxMessage) : 

遍历message list,使用endpoint 处理取出的消息

def process(dispatcher: Dispatcher) 
    RpcMessage -> endpoint.receiveAndReply(context)
    OneWayMessage -> endpoint.receive

网络通信模块

通过rpcEnv 创建server 通信端口

SparkEnv::private def create()
    val rpcEnv = RpcEnv.create(systemName, bindAddress, advertiseAddress, port.getOrElse(-1), conf,
      securityManager, numUsableCores, !isDriver)    
RpcEnv::new NettyRpcEnvFactory().create(config)
NettyRpcEnvFactory::nettyEnv.startServer(config.bindAddress, actualPort)

// 内部先初始化一个rpcHandler, 后面会看到只用的地方
NettyRpcEnv::private val transportContext = new TransportContext(transportConf, new NettyRpcHandler(dispatcher, this, streamManager)) 

NettyRpcEnv::def startServer(bindAddress: String, port: Int)
    server = transportContext.createServer(bindAddress, port, bootstraps)
TransportContext::public TransportServer createServer(String host, int port, List<TransportServerBootstrap> bootstraps)
TransportContext::new TransportServer(this, host, port, rpcHandler, bootstraps)
TransportServer::private void init(String hostToBind, int portToBind) // 这里启动Server服务

// 上面我们看到了 rpcHandler 的初始化和应用传入
// TransportServer
  private void init(String hostToBind, int portToBind) {

    IOMode ioMode = IOMode.valueOf(conf.ioMode());
    EventLoopGroup bossGroup =
      NettyUtils.createEventLoop(ioMode, conf.serverThreads(), conf.getModuleName() + "-server");
    EventLoopGroup workerGroup = bossGroup;

    // 这里启动Server服务
    bootstrap = new ServerBootstrap()
      .group(bossGroup, workerGroup)
      .channel(NettyUtils.getServerChannelClass(ioMode))
      .option(ChannelOption.ALLOCATOR, pooledAllocator)
      .option(ChannelOption.SO_REUSEADDR, !SystemUtils.IS_OS_WINDOWS)
      .childOption(ChannelOption.ALLOCATOR, pooledAllocator);

    this.metrics = new NettyMemoryMetrics(
      pooledAllocator, conf.getModuleName() + "-server", conf);

    if (conf.backLog() > 0) {
      bootstrap.option(ChannelOption.SO_BACKLOG, conf.backLog());
    }

    if (conf.receiveBuf() > 0) {
      bootstrap.childOption(ChannelOption.SO_RCVBUF, conf.receiveBuf());
    }

    if (conf.sendBuf() > 0) {
      bootstrap.childOption(ChannelOption.SO_SNDBUF, conf.sendBuf());
    }

    if (conf.enableTcpKeepAlive()) {
      bootstrap.childOption(ChannelOption.SO_KEEPALIVE, true);
    }

    bootstrap.childHandler(new ChannelInitializer<SocketChannel>() {
      @Override
      protected void initChannel(SocketChannel ch) {
        logger.debug("New connection accepted for remote address {}.", ch.remoteAddress());

        // 这里使用 rpcHandler做相关初始化
        RpcHandler rpcHandler = appRpcHandler;
        for (TransportServerBootstrap bootstrap : bootstraps) {
          rpcHandler = bootstrap.doBootstrap(ch, rpcHandler);
        }
        context.initializePipeline(ch, rpcHandler);
      }
    });

    InetSocketAddress address = hostToBind == null ?
        new InetSocketAddress(portToBind): new InetSocketAddress(hostToBind, portToBind);
    channelFuture = bootstrap.bind(address);
    channelFuture.syncUninterruptibly();

    port = ((InetSocketAddress) channelFuture.channel().localAddress()).getPort();
    logger.debug("Shuffle server started on port: {}", port);
  }
Outbox::private def launchConnectTask()
    val _client = nettyEnv.createClient(address)
TransportClientFactory::public TransportClient createClient(String remoteHost, int remotePort)
TransportContext::public TransportChannelHandler initializePipeline()
TransportChannelHandler::public void channelRead0(ChannelHandlerContext ctx, Message request)
TransportRequestHandler::public void handle(RequestMessage request)
TransportRequestHandler::private void processOneWayMessage(OneWayMessage req)
    rpcHandler.receive(reverseClient, req.body().nioByteBuffer());

[1] Spark RPC框架源码分析(一)简述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值