SparkEnv中RpcEnv的创建

1.描述:RpcEnv可以简单的描述为远程通信环境,在SparkContext创建SparkEnv过程中创建并传入多个部件中。部分不详细的地方可以参考:https://blog.csdn.net/qq_38601362/article/details/100007090

 

2.SparkEnv.scala文件:源码第249行:

val rpcEnv = RpcEnv.create(systemName, bindAddress, advertiseAddress, port.getOrElse(-1)
                           ,conf,securityManager, numUsableCores, !isDriver)

相关参数描述

private[spark] val driverSystemName = "sparkDriver"
private[spark] val executorSystemName = "sparkExecutor"
val systemName = if (isDriver) driverSystemName else executorSystemName

val bindAddress = conf.get(DRIVER_BIND_ADDRESS)   //绑定地址
val advertiseAddress = conf.get(DRIVER_HOST_ADDRESS)   //主机地址
val port = conf.get(DRIVER_PORT)   //端口
//安全管理器
val securityManager = new SecurityManager(conf, ioEncryptionKey, authSecretFileConf)
numUsableCores   //使用核心数量
!isDriver   //因为此处是驱动器环境,所以isDriver值为true

 

3.RpcEnv.scala文件

create方法:第46行

def create(
      name: String,   //系统名,这里是"sparkDriver"
      bindAddress: String,   //绑定地址
      advertiseAddress: String,   //主机地址
      port: Int,   //端口
      conf: SparkConf,   //配置
      securityManager: SecurityManager,   //安全管理器
      numUsableCores: Int,   //使用核心数
      //客户端模式,这里是Driver,值为False
      clientMode: Boolean): RpcEnv = {
    val config = RpcEnvConfig(conf, name, bindAddress, advertiseAddress, port
                 ,securityManager, numUsableCores, clientMode)
    new NettyRpcEnvFactory().create(config)
  }

//这是一个样例类,参数默认为val字段,可以通过对象访问。
//不了解可以参考:https://blog.csdn.net/qq_38601362/article/details/97658528
private[spark] case class RpcEnvConfig(
    conf: SparkConf,
    name: String,
    bindAddress: String,
    advertiseAddress: String,
    port: Int,
    securityManager: SecurityManager,
    numUsableCores: Int,
    clientMode: Boolean)

这个方法没什么东西,让我们继续ctrl+左键点进NettyRpcEnvFactory().create方法内。

 

4.NettyRpcEnv.scala文件

create方法:第463行

def create(config: RpcEnvConfig): RpcEnv = {
    val sparkConf = config.conf
    //在多个线程中使用JavaSerializerInstance是安全的。
    //但是,如果我们计划将来支持KryoSerializer,我们必须使用ThreadLocal来存储SerializerInstance
    val javaSerializerInstance =   //一个使用Java内置序列化的Spark序列化程序
      new JavaSerializer(sparkConf).newInstance().asInstanceOf[JavaSerializerInstance]
    val nettyEnv =   //相关参数用到了再说
      new NettyRpcEnv(sparkConf, javaSerializerInstance, config.advertiseAddress,
        config.securityManager, config.numUsableCores)
    if (!config.clientMode) {   //传入!isDriver,这个值目前为true
      //这是个闭包
      val startNettyRpcEnv: Int => (NettyRpcEnv, Int) = { actualPort =>
        //开始服务   ↓
        nettyEnv.startServer(config.bindAddress, actualPort)   
        //返回元组
        (nettyEnv, nettyEnv.address.port)
      }
      try {
        //尝试在给定端口上启动服务,或在尝试多次后失败     ↓ 
        Utils.startServiceOnPort(config.port, startNettyRpcEnv
                                 , sparkConf, config.name)._1
      } catch {
        case NonFatal(e) =>
          nettyEnv.shutdown()
          throw e
      }
    }
    nettyEnv
  }

def startServer(bindAddress: String, port: Int): Unit = {
    val bootstraps: java.util.List[TransportServerBootstrap] =
      if (securityManager.isAuthenticationEnabled()) {
        java.util.Arrays.asList(new AuthServerBootstrap(transportConf, securityManager))
      } else {
        java.util.Collections.emptyList()
      }
    //创建将尝试绑定到特定主机和端口的服务器   ↓
    server = transportContext.createServer(bindAddress, port, bootstraps)
  
    //注册Rpc端点
    dispatcher.registerRpcEndpoint(
      RpcEndpointVerifier.NAME, new RpcEndpointVerifier(this, dispatcher))
}

//调度员
//消息调度程序,负责将RPC消息路由到适当的端点
private val dispatcher: Dispatcher = new Dispatcher(this, numUsableCores)

//流管理器
//用于从NettyRpcEnv提供文件的StreamManager实现
private val streamManager = new NettyStreamManager(this)

//为底层客户端和服务器启用TransportContext初始化
private val transportContext = new TransportContext(transportConf,
    new NettyRpcHandler(dispatcher, this, streamManager))

//TransportContext.java createServer方法
public TransportServer createServer(
  String host, int port, List<TransportServerBootstrap> bootstraps) {
   //创建一个绑定到给定主机和给定端口的TransportServer,或者绑定到任何可用的任何0。
   //如果您不想绑定到任何特殊主机,请将“hostToBind”设置为null。
   return new TransportServer(this, host, port, rpcHandler, bootstraps);
}

接着Ctrl+左键点进Utils.startServiceOnPort方法。

Utils.startServiceOnPort:Utils.scala文件第2238行

def startServiceOnPort[T](
      startPort: Int,
      startService: Int => (T, Int),
      conf: SparkConf,
      serviceName: String = ""): (T, Int) = {

    //前置条件必须传入true,否则报错
    require(startPort == 0 || (1024 <= startPort && startPort < 65536),
      "startPort should be between 1024 and 65535 (inclusive), or 0 for a random free port.")
    
    //服务名。值为"sparkDriver"
    val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'"
    //最大重试次数.无配置为默认值
    val maxRetries = portMaxRetries(conf)
    for (offset <- 0 to maxRetries) {
      // 如果startPort为0,则不增加端口,将其视为特殊端口
      val tryPort = if (startPort == 0) {
        startPort
      } else {
        //返回startPort+offset
        userPort(startPort, offset)
      }
      try {
        //开始服务。startService在NettyRpcEnv.scala文件  ↑
        val (service, port) = startService(tryPort)
        logInfo(s"Successfully started service$serviceString on port $port.")
        return (service, port)
      } catch {
        case e: Exception if isBindCollision(e) =>
          if (offset >= maxRetries) {
            val exceptionMessage = if (startPort == 0) {
              s"${e.getMessage}: Service$serviceString failed after " +
                s"$maxRetries retries (on a random free port)! " +
                s"Consider explicitly setting the appropriate binding address for " +
                s"the service$serviceString (for example ${DRIVER_BIND_ADDRESS.key} " +
                s"for SparkDriver) to the correct binding address."
            } else {
              s"${e.getMessage}: Service$serviceString failed after " +
                s"$maxRetries retries (starting from $startPort)! Consider explicitly setting " +
                s"the appropriate port for the service$serviceString (for example spark.ui.port " +
                s"for SparkUI) to an available port or increasing spark.port.maxRetries."
            }
            val exception = new BindException(exceptionMessage)
            // 恢复原始堆栈跟踪
            exception.setStackTrace(e.getStackTrace)
            throw exception
          }
          if (startPort == 0) {
            //由于startPort 0用于随机空闲端口,因此最有可能绑定地址不正确
            logWarning(s"Service$serviceString could not bind on a random free port. " +
              "You may check whether configuring an appropriate binding address.")
          } else {
            logWarning(s"Service$serviceString could not bind on port $tryPort. " +
              s"Attempting port ${tryPort + 1}.")
          }
      }
    }
    //永远不应该发生
    throw new SparkException(s"Failed to start service$serviceString on port $startPort")
  }

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Exception in thread "main" java.lang.RuntimeException: java.lang.NoSuchFieldException: DEFAULT_TINY_CACHE_SIZE at org.apache.spark.network.util.NettyUtils.getPrivateStaticField(NettyUtils.java:131) at org.apache.spark.network.util.NettyUtils.createPooledByteBufAllocator(NettyUtils.java:118) at org.apache.spark.network.server.TransportServer.init(TransportServer.java:95) at org.apache.spark.network.server.TransportServer.<init>(TransportServer.java:74) at org.apache.spark.network.TransportContext.createServer(TransportContext.java:114) at org.apache.spark.rpc.netty.NettyRpcEnv.startServer(NettyRpcEnv.scala:118) at org.apache.spark.rpc.netty.NettyRpcEnvFactory$$anonfun$4.apply(NettyRpcEnv.scala:454) at org.apache.spark.rpc.netty.NettyRpcEnvFactory$$anonfun$4.apply(NettyRpcEnv.scala:453) at org.apache.spark.util.Utils$$anonfun$startServiceOnPort$1.apply$mcVI$sp(Utils.scala:2237) at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160) at org.apache.spark.util.Utils$.startServiceOnPort(Utils.scala:2229) at org.apache.spark.rpc.netty.NettyRpcEnvFactory.create(NettyRpcEnv.scala:458) at org.apache.spark.rpc.RpcEnv$.create(RpcEnv.scala:56) at org.apache.spark.SparkEnv$.create(SparkEnv.scala:246) at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:175) at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:257) at org.apache.spark.SparkContext.<init>(SparkContext.scala:432) at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2509) at org.apache.spark.sql.SparkSession$Builder$$anonfun$6.apply(SparkSession.scala:909) at org.apache.spark.sql.SparkSession$Builder$$anonfun$6.apply(SparkSession.scala:901) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:901) at com.cssl.scala720.KafkaSparkStreamingHBase$.main(KafkaSparkStreamingHBase.scala:28) at com.cssl.scala720.KafkaSparkStreamingHBase.main(KafkaSparkStreamingHBase.scala) Caused by: java.lang.NoSuchFieldException: DEFAULT_TINY_CACHE_SIZE at java.lang.Class.getDeclaredField(Class.java:2070) at org.apache.spark.network.util.NettyUtils.getPrivateStaticField(NettyUtils.java:127) ... 23 more Process finished with exit code 1
07-24
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值