项目的意义
- 深入理解 Spark 的 Master 和 Worker 的通讯机制
- 加深对主从服务 心跳检测机制(HeartBeat)的理解,方便以后 spark 源码二次开发。
项目需求分析
- worker 注册到 Master, Master 完成注册,并回复 worker 注册成功
- worker 定时发送心跳,并在 Master 接收到
- Master 接收到 worker 心跳后,要更新该 worker 的最近一次发送心跳的时间
- 给 Master 启动定时任务,定时检测注册的 worker 有哪些没有更新心跳,并将其从 hashmap 中删
除 - master worker 进行分布式部署(Linux 系统)
实现功能 1-Worker 完成注册
worker 注册到 Master, Master 完成注册,并回复 worker 注册成功
SparkMaster
import akka.actor.{Actor, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import com.xyq.sparkmasterworker.common.{RegisterWorkerInfo, RegisteredWorkerInfo, WorkerInfo}
import scala.collection.mutable
//创建MasterActor,实现其receive方法
class MasterActor extends Actor{
//创建一个HashMap用于存放worker的注册信息
val workers = mutable.Map[String,WorkerInfo]()
override def receive: Receive = {
case "start" => println("master服务器启动了")
case RegisterWorkerInfo(id,cpu,ram) =>{
println("接收到注册消息")
if (!workers.contains(id)){
//创建 WorkerInfo 对象
val workerInfo = new WorkerInfo(id,cpu,ram)
//加入到 workers
workers += ((id,workerInfo))
println("服务器的 workers=" + workers)
//回复一个消息,说注册成功
sender() ! RegisteredWorkerInfo
}
}
}
}
object SparkMaster extends App{
//创建ActorSystem
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider="akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname=127.0.0.1
|akka.remote.netty.tcp.port=10005
""".stripMargin)
val sparkMasterSystem =ActorSystem("SparkMaster", config)
//创建masterActor对象,及其引用
private val masterActorRef = sparkMasterSystem.actorOf(Props[MasterActor], "MasterActor-01")
//启动masterActor
masterActorRef ! "start"
}
SparkWorker
import akka.actor.{Actor, ActorSelection, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import com.xyq.sparkmasterworker.common.{RegisterWorkerInfo, RegisteredWorkerInfo}
//创建MasterActor,实现其receive方法
class WorkerActor(masterHost:String,masterPort:Int) extends Actor{
//masterActorRef是Master的引用
var masterActorRef:ActorSelection=_
//创建一个id,用于标识一个worker
val id = java.util.UUID.randomUUID().toString()
//preStart()方法,用于创建masterActorRef
override def preStart(): Unit = {
println("preStart()调用")
masterActorRef = context.actorSelection(s"akka.tcp://SparkMaster@${masterHost}:${masterPort}/user/MasterActor-01")
}
override def receive: Receive = {
case "start" => {
println("WokerActor启动了")
//发送一个注册消息
masterActorRef ! RegisterWorkerInfo(id,16,16*1024)
}
//worker收到注册成功消息后,做出回应
case RegisteredWorkerInfo =>{
println("workerid="+id+"注册成功!")
}
}
}
object WorkerActor extends App{
val workerHost = "127.0.0.1"
val workerPort = 10001
val masterHost = "127.0.0.1"
val masterPort = 10005
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider="akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname=127.0.0.1
|akka.remote.netty.tcp.port=10002
""".stripMargin)
//创建 ActorSystem
val sparkWorkerSystem =ActorSystem("SparkWorker",config)
//创建workerActor的引用
val workerActor0Ref = sparkWorkerSystem.actorOf(Props(new WorkerActor(masterHost,masterPort)),"workerActor01")
workerActor0Ref ! "start"
}
MessageProtocol
// worker 注册信息
case class RegisterWorkerInfo(id: String, cpu: Int, ram: Int)
// 这个是 WorkerInfo, 这个信息将来是保存到 master 的 hm(该 hashmap 是用于管理 worker)
//这个 WorkerInfo 还会扩展(比如增加 worker 上一次的心跳时间)
class WorkerInfo(val id: String, val cpu: Int, val ram: Int)
// 当 worker 注册成功,服务器返回一个 RegisteredWorkerInfo 对象
case object RegisteredWorkerInfo
实现功能 2-Worker 定时发送心跳
worker 定时发送心跳给 Master,Master 能够接收到,并更新 worker 上一次心跳时间
思路分析:
MessageProtocol
package com.xyq.sparkmasterworker.common
// worker 注册信息
case class RegisterWorkerInfo(id: String, cpu: Int, ram: Int)
// 这个是 WorkerInfo, 这个信息将来是保存到 master 的 hm(该 hashmap 是用于管理 worker)
//这个 WorkerInfo 还会扩展(比如增加 worker 这一次的心跳时间)
class WorkerInfo(val id: String, val cpu: Int, val ram: Int){
var lastHeartBeat : Long = System.currentTimeMillis()
}
// 当 worker 注册成功,服务器返回一个 RegisteredWorkerInfo 对象
case object RegisteredWorkerInfo
//worker 每隔一定时间由定时器发给自己的一个消息
case object SendHeartBeat
//worker 每隔一定时间由定时器触发,而向 master 发现的协议消息
case class HeartBeat(id: String)
SparkWorker
package com.xyq.sparkmasterworker.worker
import akka.actor.{Actor, ActorSelection, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import com.xyq.sparkmasterworker.common.{RegisterWorkerInfo, RegisteredWorkerInfo, SendHeartBeat,HeartBeat}
//时间单位
import scala.concurrent.duration._
//创建MasterActor,实现其receive方法
class WorkerActor(masterHost:String,masterPort:Int) extends Actor{
//masterActorRef是Master的引用
var masterActorRef:ActorSelection=_
//创建一个id,用于标识一个worker
val id = java.util.UUID.randomUUID().toString()
//preStart()方法,用于创建masterActorRef
override def preStart(): Unit = {
println("preStart()调用")
masterActorRef = context.actorSelection(s"akka.tcp://SparkMaster@${masterHost}:${masterPort}/user/MasterActor-01")
}
override def receive: Receive = {
case "start" => {
println("WokerActor启动了")
//发送一个注册消息
masterActorRef ! RegisterWorkerInfo(id,16,16*1024)
}
//worker收到注册成功消息后,做出回应
case RegisteredWorkerInfo =>{
println("workerid="+id+"注册成功!")
//当注册成功后,就定义一个定时器,每隔一定时间,发送 SendHeartBeat 给自己
import context.dispatcher
// 说明
//1. 0 millis 不延时,立即执行定时器
//2. 3000 millis 表示每隔 3 秒执行一次
//3. self: 表示发给自己
//4. SendHeartBeat 发送的内容
context.system.scheduler.schedule(0 millis, 3000 millis, self, SendHeartBeat)
}
case SendHeartBeat =>{
println("worker = " + id + "给 master 发送心跳")
masterActorRef ! HeartBeat(id)
}
}
}
object WorkerActor extends App{
val workerHost = "127.0.0.1"
val workerPort = 10001
val masterHost = "127.0.0.1"
val masterPort = 10005
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider="akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname=127.0.0.1
|akka.remote.netty.tcp.port=10002
""".stripMargin)
//创建 ActorSystem
val sparkWorkerSystem =ActorSystem("SparkWorker",config)
//创建workerActor的引用
val workerActor0Ref = sparkWorkerSystem.actorOf(Props(new WorkerActor(masterHost,masterPort)),"workerActor01")
workerActor0Ref ! "start"
}
SparkMaster
package com.xyq.sparkmasterworker.master
import akka.actor.{Actor, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import com.xyq.sparkmasterworker.common.{HeartBeat, RegisterWorkerInfo, RegisteredWorkerInfo, WorkerInfo}
import scala.collection.mutable
//创建MasterActor,实现其receive方法
class MasterActor extends Actor{
//创建一个HashMap用于存放worker的注册信息
val workers = mutable.Map[String,WorkerInfo]()
override def receive: Receive = {
case "start" => println("master服务器启动了")
case RegisterWorkerInfo(id,cpu,ram) =>{
println("接收到注册消息")
if (!workers.contains(id)){
//创建 WorkerInfo 对象
val workerInfo = new WorkerInfo(id,cpu,ram)
//加入到 workers
workers += ((id,workerInfo))
println("服务器的 workers=" + workers)
//回复一个消息,说注册成功
sender() ! RegisteredWorkerInfo
}
}
case HeartBeat(id)=>{
//更新对应的 worker 的心跳时间
//1.从 workers 取出 WorkerInfo
val workerInfo = workers(id)
workerInfo.lastHeartBeat = System.currentTimeMillis()
println("master 更新了 " + id + " 心跳时间...")
}
}
}
object SparkMaster extends App{
//创建ActorSystem
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider="akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname=127.0.0.1
|akka.remote.netty.tcp.port=10005
""".stripMargin)
val sparkMasterSystem =ActorSystem("SparkMaster", config)
//创建masterActor对象,及其引用
private val masterActorRef = sparkMasterSystem.actorOf(Props[MasterActor], "MasterActor-01")
//启动masterActor
masterActorRef ! "start"
}
实现功能 3-Master 启动定时任务,定时检测注册的 worker
功能要求:Master 启动定时任务,定时检测注册的 worker 有哪些没有更新心跳,已经超时的 worker,将其从 hashmap 中删除掉
MessageProtocol
package com.xyq.sparkmasterworker.common
// worker 注册信息
case class RegisterWorkerInfo(id: String, cpu: Int, ram: Int)
// 这个是 WorkerInfo, 这个信息将来是保存到 master 的 hm(该 hashmap 是用于管理 worker)
//这个 WorkerInfo 还会扩展(比如增加 worker 这一次的心跳时间)
class WorkerInfo(val id: String, val cpu: Int, val ram: Int){
var lastHeartBeat : Long = System.currentTimeMillis()
}
// 当 worker 注册成功,服务器返回一个 RegisteredWorkerInfo 对象
case object RegisteredWorkerInfo
//worker 每隔一定时间由定时器发给自己的一个消息
case object SendHeartBeat
//worker 每隔一定时间由定时器触发,而向 master 发现的协议消息
case class HeartBeat(id: String)
//master 给自己发送一个触发检查超时 worker 的信息
case object StartTimeOutWorker
// master 给自己发消息,检测 worker,对于心跳超时的.
case object RemoveTimeOutWorker
SparkMaster
package com.xyq.sparkmasterworker.master
import akka.actor.{Actor, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import com.xyq.sparkmasterworker.common._
import scala.collection.mutable
//时间单位
import scala.concurrent.duration._
//创建MasterActor,实现其receive方法
class MasterActor extends Actor{
//创建一个HashMap用于存放worker的注册信息
val workers = mutable.Map[String,WorkerInfo]()
override def receive: Receive = {
case "start" =>{
println("master服务器启动了")
//这里开始。。
self ! StartTimeOutWorker
}
case StartTimeOutWorker=>{
println("开始了定时检测 worker 心跳的任务")
import context.dispatcher
//说明
//1. 0 millis 不延时,立即执行定时器
//2. 9000 millis 表示每隔 3 秒执行一次
//3. self:表示发给自己
//4. RemoveTimeOutWorker 发送的内容
context.system.scheduler.schedule(0 millis, 9000 millis, self, RemoveTimeOutWorker)
}
case RemoveTimeOutWorker=>{
//首先得到所有的 workers 的 所有 WorkerInfo
val workerInfos = workers.values
val nowTime = System.currentTimeMillis()
//先把超时的所有 workerInfo,删除即可
workerInfos.filter(workerInfo => (nowTime - workerInfo.lastHeartBeat) > 6000)
.foreach(workerInfo=>workers.remove(workerInfo.id))
println("当前有 " + workers.size + " 个 worker 存活的")
}
case RegisterWorkerInfo(id,cpu,ram) =>{
println("接收到注册消息")
if (!workers.contains(id)){
//创建 WorkerInfo 对象
val workerInfo = new WorkerInfo(id,cpu,ram)
//加入到 workers
workers += ((id,workerInfo))
println("服务器的 workers=" + workers)
//回复一个消息,说注册成功
sender() ! RegisteredWorkerInfo
}
}
case HeartBeat(id)=>{
//更新对应的 worker 的心跳时间
//1.从 workers 取出 WorkerInfo
val workerInfo = workers(id)
workerInfo.lastHeartBeat = System.currentTimeMillis()
println("master 更新了 " + id + " 心跳时间...")
}
}
}
object SparkMaster extends App{
//创建ActorSystem
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider="akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname=127.0.0.1
|akka.remote.netty.tcp.port=10005
""".stripMargin)
val sparkMasterSystem =ActorSystem("SparkMaster", config)
//创建masterActor对象,及其引用
private val masterActorRef = sparkMasterSystem.actorOf(Props[MasterActor], "MasterActor-01")
//启动masterActor
masterActorRef ! "start"
}
SparkWorker
package com.xyq.sparkmasterworker.worker
import akka.actor.{Actor, ActorSelection, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import com.xyq.sparkmasterworker.common.{RegisterWorkerInfo, RegisteredWorkerInfo, SendHeartBeat,HeartBeat}
//时间单位
import scala.concurrent.duration._
//创建MasterActor,实现其receive方法
class WorkerActor(masterHost:String,masterPort:Int) extends Actor{
//masterActorRef是Master的引用
var masterActorRef:ActorSelection=_
//创建一个id,用于标识一个worker
val id = java.util.UUID.randomUUID().toString()
//preStart()方法,用于创建masterActorRef
override def preStart(): Unit = {
println("preStart()调用")
masterActorRef = context.actorSelection(s"akka.tcp://SparkMaster@${masterHost}:${masterPort}/user/MasterActor-01")
}
override def receive: Receive = {
case "start" => {
println("WokerActor启动了")
//发送一个注册消息
masterActorRef ! RegisterWorkerInfo(id,16,16*1024)
}
//worker收到注册成功消息后,做出回应
case RegisteredWorkerInfo =>{
println("workerid="+id+"注册成功!")
//当注册成功后,就定义一个定时器,每隔一定时间,发送 SendHeartBeat 给自己
import context.dispatcher
// 说明
//1. 0 millis 不延时,立即执行定时器
//2. 3000 millis 表示每隔 3 秒执行一次
//3. self: 表示发给自己
//4. SendHeartBeat 发送的内容
context.system.scheduler.schedule(0 millis, 3000 millis, self, SendHeartBeat)
}
case SendHeartBeat =>{
println("worker = " + id + "给 master 发送心跳")
masterActorRef ! HeartBeat(id)
}
}
}
object WorkerActor extends App{
val workerHost = "127.0.0.1"
val workerPort = 10001
val masterHost = "127.0.0.1"
val masterPort = 10005
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider="akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname=127.0.0.1
|akka.remote.netty.tcp.port=10004
""".stripMargin)
//创建 ActorSystem
val sparkWorkerSystem =ActorSystem("SparkWorker",config)
//创建workerActor的引用
val workerActor0Ref = sparkWorkerSystem.actorOf(Props(new WorkerActor(masterHost,masterPort)),"workerActor01")
workerActor0Ref ! "start"
}