使用Akka实现一个简易版的spark通信框架
1.架构图
2.具体代码
(1)Master
import akka.actor.{Actor, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.concurrent.duration._
//todo:利用akka中的actor模型 来实现简易版的spark通信框架 --master端
class Master extends Actor{
//不在任何方法中的代码,称为构造代码
//master构造代码先执行
println("Master constructor invoked")
//定义一个Map集合,用于存储每一个worker的注册信息 key:workerId value:WorkerInfo
private val workerId2WorkerInfo = new mutable.HashMap[String,WorkerInfo]
//定义一个list集合,用于保存每一个workerInfo对象,方便后期安装worker的内存大小排序
private val workerInfoList = new ListBuffer[WorkerInfo]
//定义master定时检查的时间间隔
val checkOutTimeInterval = 15000 //15秒
//构造代码后执行,类似一个初始化的方法
override def preStart(): Unit = {
println("preStart method invoked")
//master定时检查超时的worker
//手动导入隐式转换
import context.dispatcher
context.system.scheduler.schedule(0 millis,checkOutTimeInterval millis,self,CheckOutTime)
}
//receive方法会在preStart方法执行后被调用,在这里表示不断的接收消息
override def receive: Receive = {
//master接收worker的注册信息
case RegisterMessage(workerId,memory,cores) => {
//master只接受没有注册的worker的信息
if(!workerId2WorkerInfo.contains(workerId)){
//构建一个workerInfo对象
val workerInfo = new WorkerInfo(workerId,memory,cores)
workerId2WorkerInfo += (workerId -> workerInfo)
//添加到list集合中
workerInfoList += workerInfo
//master反馈注册成功的信息给worker
sender() ! RegisteredMessage(s"workerId:$workerId 注册成功")
}
}
//master接受worker的心跳
case HeartBeat(workerId) => {
//判断 哪些worker是注册了 master只接受注册的worker的心跳信息
if(workerId2WorkerInfo.contains(workerId)){
//获取已经注册的worker
val workerInfo = workerId2WorkerInfo(workerId)
//获取系统时间
val now:Long = System.currentTimeMillis()
//把当前的系统时间赋值给每一个worker的上一次心跳时间变量
workerInfo.lastHheartBeartTime = now
}
}
//master接收自己的信息
case CheckOutTime =>{
//worker超时的判定逻辑:当前的时间 - worker的上一次心跳时间 > master定时检查的时间间隔
val outTimeWorkerInfoList: ListBuffer[WorkerInfo] =
workerInfoList.filter(workerInfo => System.currentTimeMillis() - workerInfo.lastHheartBeartTime > checkOutTimeInterval)
//遍历
for(outWorkerInfo <- outTimeWorkerInfoList){
//获取workerId
val workerId: String = outWorkerInfo.workerId
//从map集合移除掉超时worker信息
workerId2WorkerInfo -= (workerId)
//从list集合移除的worker信息
workerInfoList -= outWorkerInfo
println(s"超时的workerId:$workerId")
}
println("活着的worker总数:"+workerInfoList.size)
}
}
}
object Master{
//启动参数:127.0.0.1 8888
def main(args: Array[String]): Unit = {
//定义master的ip地址和端口
val host = args(0)
val port = args(1)
//准备配置信息 shift+英文上引号 3次
val configStr: String =
s"""
|akka.actor.provider = "akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname = "$host"
|akka.remote.netty.tcp.port = "$port"
""".stripMargin
//构建一个config对象
val config = ConfigFactory.parseString(configStr)
//1.创建ActorSystem老大,它负责创建和监督子Actor
val masterActorSystem = ActorSystem("masterActorSystem",config)
//2.基于masterActorSystem 来创建master actor
val masterActor = masterActorSystem.actorOf(Props(new Master),"masterActor")
//3.测试 向master actor发送一个消息
masterActor ! "start"
}
}
(2)Worker
import java.util.UUID
import akka.actor.{Actor, ActorRef, ActorSelection, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import scala.concurrent.duration._
//todo:利用akka来实现actor模型 来实现简易版的spark通信框架 --worker端
class Worker(val memory:Int,val cores:Int,val masterHost:String,val masterPort:String) extends Actor{
//构造代码先运行
println("Worker constructor invoked")
//定义workerId
private val workerId: String = UUID.randomUUID().toString
//定义worker向master发送心跳的时间间隔
val sendHeartBeatInterval = 10000
var master: ActorSelection = _ //下划线表示默认的缺省值 这里等价于null
//preStart方法会在构造代码执行后被调用 只会被执行一次
override def preStart() : Unit = {
println("preStart method invoked")
//获取到master的引用
//通过ActorContext上下文对象,调用actorSelection从已存在actor中找到目标actor 方法中需要一个字符串
//这个字符串包括了:1.通信协议 2.master的ip 3.master端口
//4.master actor的名称 5.创建master actor的老大 ActorSystem 6.actor层级关系
master = context.actorSelection(s"akka.tcp://masterActorSystem@$masterHost:$masterPort/user/masterActor")
//向master发送注册信息 使用样例类封装注册信息 包括(workerId,memoery,cores)
master ! RegisterMessage(workerId,memory,cores)
}
override def receive: Receive = {
case RegisteredMessage(messgae) =>{
println(messgae)
//worker定时向master发送心跳
//由于类型不匹配,不可以再这里直接给master定时发送心跳
//手动导入隐式转换
import context.dispatcher
context.system.scheduler.schedule(0 millis,sendHeartBeatInterval millis,self,SendHeartBeat)
}
//worker接受自己发送的消息
case SendHeartBeat => {
//在这里实现worker真正向master发送心跳
master ! HeartBeat(workerId)
}
}
}
object Worker{
//启动参数 127.0.0.1 6666 50 24 127.0.0.1 8888
def main(args: Array[String]): Unit = {
//定义worker的ip地址和端口
val host = args(0)
val port = args(1)
//定义worker的内存
val memory = args(2).toInt
//定义worker的cpu核数
val cores = args(3).toInt
//定义masterHost
val masterHost = args(4)
//定义masterPort
val masterPort = args(5)
val configStr: String =
s"""
|akka.actor.provider = "akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname = "$host"
|akka.remote.netty.tcp.port = "$port"
""".stripMargin
val config = ConfigFactory.parseString(configStr)
//1.创建ActorSystem
val workerActorSystem : ActorSystem = ActorSystem("workerActorSystem",config)
//2.创建worker actor
val workerActor: ActorRef = workerActorSystem.actorOf(Props(new Worker(memory,cores,masterHost,masterPort)),"workerActor")
//3.测试
workerActor ! "start"
}
}
(3)样例类 RemoteMessage
trait RemoteMessage extends Serializable {
}
//worker向master发送注册信息,由于不在同一进程中,需要实现序列号
case class RegisterMessage(val workerId:String,val memory:Int,val cores:Int) extends RemoteMessage
//master反馈注册成功的信息给worker,由于不在同一进程中,需要实现序列号
case class RegisteredMessage(val message:String) extends RemoteMessage
//worker自己给自己发送消息,由于在同一进程中,不需要实现序列号
case object SendHeartBeat
//worker向master发送心跳,由于不在同一进程中,需要实现序列号
case class HeartBeat(val workerId:String) extends RemoteMessage
//master给自己发送信息,由于在同一进程中,不需要实现序列号
case object CheckOutTime
(4)WorkerInfo
//todo:用于封装worker的注册信息
class WorkerInfo(val workerId:String,val memory:Int,val cores:Int) {
//定义一个变量 主要用于存储woker上一次的心跳时间
var lastHheartBeartTime:Long = _
}
先启动master,再启动2个worker,然后停止一个worker,运行结果
master:
[INFO] [09/03/2019 09:28:41.744] [main] [Remoting] Starting remoting
[INFO] [09/03/2019 09:28:42.831] [main] [Remoting] Remoting started; listening on addresses :[akka.tcp://masterActorSystem@127.0.0.1:8888]
[INFO] [09/03/2019 09:28:42.833] [main] [Remoting] Remoting now listens on addresses: [akka.tcp://masterActorSystem@127.0.0.1:8888]
Master constructor invoked
preStart method invoked
活着的worker总数:0
活着的worker总数:1
活着的worker总数:1
活着的worker总数:1
活着的worker总数:2
[WARN] [09/03/2019 09:29:47.978] [masterActorSystem-akka.remote.default-remote-dispatcher-6] [akka.tcp://masterActorSystem@127.0.0.1:8888/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2FworkerActorSystem%40127.0.0.1%3A6667-1] Association with remote system [akka.tcp://workerActorSystem@127.0.0.1:6667] has failed, address is now gated for [5000] ms. Reason: [Disassociated]
超时的workerId:a078ebfd-4bea-472f-9390-34c5ff03a78d
活着的worker总数:1
worker:
Worker constructor invoked
preStart method invoked
workerId:211fe9c0-1b2a-4178-8b62-7d1a781a4952 注册成功