Spark - RpcEnv Object &abstract class RpcEnv & RpcEnvConfig & NettyRpcEnv class object 源码解析
疑问?
1.是否只能在本机器上才能setup RpcEndpointRef,别的节点只能验证某个节点是否已经setup过这个 RpcEndpoint?
case class RpcEnvConfig
RpcEnvConfig是一个case class,就是把这几个参数包装了一下,方便使用:
private[spark] case class RpcEnvConfig(
conf: SparkConf,
name: String,//别称
bindAddress: String,//主机
advertiseAddress: String,//主机
port: Int,//端口 为 0 的话,将会由系统分配端口
securityManager: SecurityManager,
numUsableCores: Int,//CPU核数
clientMode: Boolean//如果是非 yarn-cluster模式,则为true
)
object RpcEnv
这个伴生对象只有2个方法,都是返回一个RpcEnv的实例:
def create(
name: String, //别称
host: String, //主机
port: Int, //端口
conf: SparkConf,
securityManager: SecurityManager,
clientMode: Boolean = false): RpcEnv = {
create(name, host, host, port, conf, securityManager, 0, clientMode)//转移到第2个方法
}
def create(
name: String,//别称
bindAddress: String,//主机
advertiseAddress: String, //主机
port: Int,//端口
conf: SparkConf,
securityManager: SecurityManager,
numUsableCores: Int, //CPU核数
//如果是非 yarn-cluster模式,则为true
clientMode: Boolean): RpcEnv = {
//创建RpcEnvConfig case class 实例
val config = RpcEnvConfig(conf, name, bindAddress, advertiseAddress, port, securityManager,
numUsableCores, clientMode)
//创建 RpcEnv的 NettyRpcEnv子类实例,启动服务
new NettyRpcEnvFactory().create(config)
}
abstract class RpcEnv
RpcEnv可以看作 类似的网线通道,RpcEndpoint可以类似的看作网口(RpcEndpoint),网口肯定会连接到网线的,网线的另一段是另一个网口(RpcEndpointRef)
它有一个属性:
private[spark] val defaultLookupTimeout = RpcUtils.lookupRpcTimeout(conf) 是一个网络的超时时间
有很多的方法:
//
private[rpc] def endpointRef(endpoint: RpcEndpoint): RpcEndpointRef
def address: RpcAddress
def setupEndpoint(name: String, endpoint: RpcEndpoint): RpcEndpointRef
def asyncSetupEndpointRefByURI(uri: String): Future[RpcEndpointRef]
def setupEndpointRefByURI(uri: String): RpcEndpointRef = {
defaultLookupTimeout.awaitResult(asyncSetupEndpointRefByURI(uri))
}
def setupEndpointRef(address: RpcAddress, endpointName: String): RpcEndpointRef = {
setupEndpointRefByURI(RpcEndpointAddress(address, endpointName).toString)
}
def stop(endpoint: RpcEndpointRef): Unit
def shutdown(): Unit
def awaitTermination(): Unit
def deserialize[T](deserializationAction: () => T): T
def fileServer: RpcEnvFileServer
def openChannel(uri: String): ReadableByteChannel
NettyRpcEnv class & object
NettyRpcEnv class 是RpcEnv的唯一实现类。
这类里面有
- 网络分发器 dispatcher,用来管理所有的RpcEndPoint和分发所有的message。
dispatcher 内部会保存 所有注册的RpcEndpoint,用RpcEndpoint的 name标示来区分不同的RpcEndpoint。 - outboxes 是一个map,保存 RpcAddress->Outbox的键值对。根据不同目标的host,选择不同的发件箱Outbox。
NettyRpcEnv object
里面主要是2个属性:
//DynamicVariable 类似线程变量
//保存 NettyRpcEnv 的实例
private[netty] val currentEnv = new DynamicVariable[NettyRpcEnv](null)
//保存 TransportClient的实例
private[netty] val currentClient = new DynamicVariable[TransportClient](null)
NettyRpcEnv class
属性:
//启动 netty的config
private[netty] val transportConf = SparkTransportConf.fromSparkConf(
conf.clone.set("spark.rpc.io.numConnectionsPerPeer", "1"),
"rpc",
conf.getInt("spark.rpc.io.threads", 0))
// 分发器
private val dispatcher: Dispatcher = new Dispatcher(this, numUsableCores)
private val streamManager = new NettyStreamManager(this)
//netty context,NettyRpcHandler是处理请求的handler
private val transportContext = new TransportContext(transportConf,
new NettyRpcHandler(dispatcher, this, streamManager))
//这个是网络请求的 客户端 生产工厂
private val clientFactory = transportContext.createClientFactory(createClientBootstraps())
//下载文件的 网络 client
@volatile private var fileDownloadFactory: TransportClientFactory = _
//rpc 线程池
private[netty] val clientConnectionExecutor = ThreadUtils.newDaemonCachedThreadPool(
"netty-rpc-connection",
conf.getInt("spark.rpc.connect.threads", 64))
//netty server
@volatile private var server: TransportServer = _
private val stopped = new AtomicBoolean(false)
//发件箱 map,rpcAddress为key,Outbox为value,即一个rpcAddress一个发件箱Outbox的实例。
private val outboxes = new ConcurrentHashMap[RpcAddress, Outbox]()
方法:
//Remove the address's Outbox and stop it.
private[netty] def removeOutbox(address: RpcAddress): Unit
//start netty server and use dispacher register RpcEndpointVerifier
def startServer(bindAddress: String, port: Int): Unit
//返回 netty server 的host 和 port
override lazy val address: RpcAddress
//setup endpoint 通过 endpoint 实例和name
override def setupEndpoint(name: String, endpoint: RpcEndpoint): RpcEndpointRef = {
dispatcher.registerRpcEndpoint(name, endpoint)
}
//异步setup endPoint,这个方法的前提是要 提前 setup过
def asyncSetupEndpointRefByURI(uri: String): Future[RpcEndpointRef] = {
val addr = RpcEndpointAddress(uri)
val endpointRef = new NettyRpcEndpointRef(conf, addr, this)
val verifier = new NettyRpcEndpointRef(
conf, RpcEndpointAddress(addr.rpcAddress, RpcEndpointVerifier.NAME), this)
verifier.ask[Boolean](RpcEndpointVerifier.CheckExistence(endpointRef.name)).flatMap { find =>
if (find) {
Future.successful(endpointRef)
} else {
Future.failed(new RpcEndpointNotFoundException(uri))
}
}(ThreadUtils.sameThread)
}
//stop 一个 endpointRef
override def stop(endpointRef: RpcEndpointRef): Unit = {
require(endpointRef.isInstanceOf[NettyRpcEndpointRef])
dispatcher.stop(endpointRef)
}
//投递 消息到 Outbox,这个方法是本类内部调用的
//这里的OutboxMessage消息message,已经是 序列化后的了
//这里的NettyRpcEndpointRef类型的receiver一定是 别的 host 地址
private def postToOutbox(receiver: NettyRpcEndpointRef, message: OutboxMessage): Unit
//send 消息 这个方法会 自己选择 投递到Inbox 还是Outbox
//这个方法中如果消息是发送到别的host 会把message: RequestMessage 先进行 序列化的
//所以这里的message: RequestMessage 中的addr 可以是 本地的,也可以是 别的host的
private[netty] def send(message: RequestMessage):Unit = {
val remoteAddr = message.receiver.address
if (remoteAddr == address) {
// Message to a local RPC endpoint.
try {//不需要回调 所以是 OneWayMessage
dispatcher.postOneWayMessage(message)
} catch {
case e: RpcEnvStoppedException => logDebug(e.getMessage)
}
} else {
// Message to a remote RPC endpoint.
//不需要回调 所以是 OneWayMessage,这里使用的是message: RequestMessage的序列化方法
postToOutbox(message.receiver, OneWayOutboxMessage(message.serialize(this)))
}
}
//创建一个请求 客户端 主要在Outbox 类中使用到这个方法
private[netty] def createClient(address: RpcAddress): TransportClient
//发送异步 消息 RequestMessage封装了 senderAddress,receiver,content,这里会用到 RequestMessage的 序列化方法,这个序列化中又会调用 nettyRpcEnv 的 序列化方法
//注意这个 方法的访问控制是 private[netty],只能在NettyRpcEndpointRef这个类中使用
private[netty] def ask[T: ClassTag](message: RequestMessage, timeout: RpcTimeout): Future[T
//nettyRpcEnv 的 序列化方法
private[netty] def serialize(content: Any): ByteBuffer = {
javaSerializerInstance.serialize(content)
}
//nettyRpcEnv 的 序列化流方法
private[netty] def serializeStream(out: OutputStream): SerializationStream = {
javaSerializerInstance.serializeStream(out)
}
//nettyRpcEnv 的 反序列化方法
private[netty] def deserialize[T: ClassTag](client: TransportClient, bytes: ByteBuffer): T = {
NettyRpcEnv.currentClient.withValue(client) {
deserialize { () =>
javaSerializerInstance.deserialize[T](bytes)
}
}
}
//ask这个方法是 要获取 异步请求结果的 方法
//注意这个 方法的访问控制是 private[netty],只能在NettyRpcEndpointRef这个类中使用
private[netty] def ask[T: ClassTag](message: RequestMessage, timeout: RpcTimeout): Future[T] =
trait RpcEndpoint
内部有一个属性val rpcEnv: RpcEnv,可以把这个看作是网线和网口的区别和联系。
RpcEndpointVerifier class
这个类存在的目的是验证 是否存在某个 endPoint,所以在启动的时候,会第一个registerRpcEndpoint 注册这个RpcEndpoint的子类
private[netty] class RpcEndpointVerifier(override val rpcEnv: RpcEnv, dispatcher: Dispatcher)
extends RpcEndpoint {
override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
case RpcEndpointVerifier.CheckExistence(name) => context.reply(dispatcher.verify(name))
}
}
private[netty] object RpcEndpointVerifier {
val NAME = "endpoint-verifier"
/** A message used to ask the remote [[RpcEndpointVerifier]] if an `RpcEndpoint` exists. */
case class CheckExistence(name: String)
}
abstract class RpcEndpointRef
这个可以看作 远端的RpcEndpoint的引用,可以看作远端的网口。
这个内部有几个参数属性:
private[this] val maxRetries = RpcUtils.numRetries(conf) //重试次数
private[this] val retryWaitMs = RpcUtils.retryWaitMs(conf) //重试间隔
private[this] val defaultAskTimeout = RpcUtils.askRpcTimeout(conf) //超时时间
定义的几个方法:
def address: RpcAddress //返回远端网口的地址
def name: String //返回远端网口的标示名称
def send(message: Any): Unit //发送消息给远端网口,发送即忘记
def ask[T: ClassTag](message: Any, timeout: RpcTimeout): Future[T]//异步可配置超时时间发送消息给远端网口
def ask[T: ClassTag](message: Any): Future[T] = ask(message, defaultAskTimeout)//异步默认超时时间发送消息给远端网口
def askSync[T: ClassTag](message: Any): T = askSync(message, defaultAskTimeout)//同步默认超时时间发送消息给远端网口
//同步可配置超时时间发送消息给远端网口
def askSync[T: ClassTag](message: Any, timeout: RpcTimeout): T = {
val future = ask[T](message, timeout)
timeout.awaitResult(future)
}
NettyRpcEndpointRef
这个类似远端网口,那么里面肯定有网线 RpcEnv(RpcEnv的实现类 NettyRpcEnv),有远端 地址(RpcEndpointAddress)。
当一个 NettyRpcEndpointRef 的实例安装完成之后,就可以使用它来发送消息了(并且只能通过它发送消息)
下面详细看看这个类:
构造方法:
private[netty] class NettyRpcEndpointRef(
@transient private val conf: SparkConf, //conf
private val endpointAddress: RpcEndpointAddress, //远端网口
@transient @volatile private var nettyEnv: NettyRpcEnv //网线 RpcEnv(RpcEnv的实现类 NettyRpcEnv)
) extends RpcEndpointRef(conf) {
//属性有
@transient @volatile var client: TransportClient = _ //用来通信的
}
方法:
override def address: RpcAddress = //远端地址
if (endpointAddress.rpcAddress != null) endpointAddress.rpcAddress else null
private def readObject(in: ObjectInputStream): Unit = {
in.defaultReadObject()
nettyEnv = NettyRpcEnv.currentEnv.value
client = NettyRpcEnv.currentClient.value //设置 通信
}
private def writeObject(out: ObjectOutputStream): Unit = {
out.defaultWriteObject()
}
override def name: String = endpointAddress.name //远端地址标示名称
//异步可配置超时时间发送消息给远端网口
override def ask[T: ClassTag](message: Any, timeout: RpcTimeout): Future[T] = {
//发送消息 转到 NettyRpcEnv 的 ask方法,
//nettyEnv.address 发送的客户端地址,this的作用主要是使用这个 addr地址信息(所以这个是远端地址)
nettyEnv.ask(new RequestMessage(nettyEnv.address, this, message), timeout)
}
//发送消息给远端网口,发送即忘记
override def send(message: Any): Unit = {
require(message != null, "Message is null")
//nettyEnv.address 发送的客户端地址,this的作用主要是使用这个 addr地址信息(所以这个是远端地址)
nettyEnv.send(new RequestMessage(nettyEnv.address, this, message))
}
override def toString: String = s"NettyRpcEndpointRef(${endpointAddress})"
final override def equals(that: Any): Boolean = that match {
case other: NettyRpcEndpointRef => endpointAddress == other.endpointAddress
case _ => false
}
final override def hashCode(): Int =
if (endpointAddress == null) 0 else endpointAddress.hashCode()
case class RpcAddress & object RpcAddress
这个是RpcEnv的address,包装了一下host和port,处理URL和SparkURL的不同地址
private[spark] case class RpcAddress(host: String, port: Int) {
def hostPort: String = host + ":" + port
/** Returns a string in the form of "spark://host:port". */
def toSparkURL: String = "spark://" + hostPort
override def toString: String = hostPort
}
private[spark] object RpcAddress {
/** Return the [[RpcAddress]] represented by `uri`. */
def fromURIString(uri: String): RpcAddress = {
val uriObj = new java.net.URI(uri)
RpcAddress(uriObj.getHost, uriObj.getPort)
}
/** Returns the [[RpcAddress]] encoded in the form of "spark://host:port" */
def fromSparkURL(sparkUrl: String): RpcAddress = {
val (host, port) = Utils.extractHostPortFromSparkUrl(sparkUrl)
RpcAddress(host, port)
}
}
case class RpcEndpointAddress & object RpcEndpointAddress
看这个类的构造方法,可以看出它与 RpcAddress 是组合的关系。
又把通信host和port 包装了一次,加入了endpointName的标示,方便识别。
private[spark] case class RpcEndpointAddress(rpcAddress: RpcAddress, name: String) {
require(name != null, "RpcEndpoint name must be provided.")
def this(host: String, port: Int, name: String) = {
this(RpcAddress(host, port), name)
}
override val toString = if (rpcAddress != null) {
s"spark://$name@${rpcAddress.host}:${rpcAddress.port}"
} else {
s"spark-client://$name"
}
}
private[spark] object RpcEndpointAddress {
def apply(host: String, port: Int, name: String): RpcEndpointAddress = {
new RpcEndpointAddress(host, port, name)
}
def apply(sparkUrl: String): RpcEndpointAddress = {
try {
val uri = new java.net.URI(sparkUrl)
val host = uri.getHost
val port = uri.getPort
val name = uri.getUserInfo
if (uri.getScheme != "spark" ||
host == null ||
port < 0 ||
name == null ||
(uri.getPath != null && !uri.getPath.isEmpty) || // uri.getPath returns "" instead of null
uri.getFragment != null ||
uri.getQuery != null) {
throw new SparkException("Invalid Spark URL: " + sparkUrl)
}
new RpcEndpointAddress(host, port, name)
} catch {
case e: java.net.URISyntaxException =>
throw new SparkException("Invalid Spark URL: " + sparkUrl, e)
}
}
}
trait RpcEnvFactory
RpcEnvFactory,RpcEnv看作的是网线,那么RpcEnvFactory就是制作网线的工厂模式的统一接口,专门用来生产各种网线,这个工程提供创建RpcEnv的功能,但目前只有一个实现类NettyRpcEnvFactory。
private[spark] trait RpcEnvFactory {
def create(config: RpcEnvConfig): RpcEnv
}
class NettyRpcEnvFactory
private[rpc] class NettyRpcEnvFactory extends RpcEnvFactory with Logging {
//RpcEnvConfig host port 等的case class 简单包装类
def create(config: RpcEnvConfig): RpcEnv = {
val sparkConf = config.conf
// Use JavaSerializerInstance in multiple threads is safe. However, if we plan to support
// KryoSerializer in future, we have to use ThreadLocal to store SerializerInstance
//创建java 序列化 实例
val javaSerializerInstance =
new JavaSerializer(sparkConf).newInstance().asInstanceOf[JavaSerializerInstance]
//创建 NettyRpcEnv 实例
val nettyEnv =
new NettyRpcEnv(sparkConf, javaSerializerInstance, config.advertiseAddress,
config.securityManager, config.numUsableCores)
if (!config.clientMode) {
//匿名函数启动函数,启动完成之后,在NettyRpcEnv中会保存有 真实的 port,这个port是由系统分配的
val startNettyRpcEnv: Int => (NettyRpcEnv, Int) = { actualPort =>
nettyEnv.startServer(config.bindAddress, actualPort)
(nettyEnv, nettyEnv.address.port)
}
try {
Utils.startServiceOnPort(config.port, startNettyRpcEnv, sparkConf, config.name)._1
} catch {
case NonFatal(e) =>
nettyEnv.shutdown()
throw e
}
}
//返回创建的NettyRpcEnv
nettyEnv
}
}