回顾
/**
* Akka
* 开发高并发的一个工具包
*
* Actor
* Actor和Actor可以相互通信,通信的方式是通过传递消息。
*
* ActorSystem的作用其实创建,管理Actor
*
* Actor和Actor不能直接通信,是通过ActorRef进行通信。
* (在传创建ActorRef的同时也创建了dispatcher和MailBox
*
*/
1.Master和worker通信机制,
package cn.sheep.spark
import java.util.UUID
import akka.actor.{Actor, ActorSelection, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import scala.concurrent.duration._ //导入的是时间单位
class SparkWorker(masterUrl: String) extends Actor{
//master的代理对象
var masterProxy: ActorSelection = _
val workId = UUID.randomUUID().toString
override def preStart(): Unit = {
masterProxy = context.actorSelection(masterUrl)
}
override def receive: Receive = {
case "started" => {
//向master注册自己的信息,id,core,ram
masterProxy ! RegisterWorkerInfo(workId, 4, 32*1024) //此时master会收到
}
case RegistedWorkerInfo => { //master发送给自己的注册成功消息
//worker启动一个定时器,定时向master发送心跳
import context.dispatcher //使用调度器的时候必须导入调度器
context.system.scheduler.schedule(0 millis, 1500 millis, self, SendHeartBeat)
}
case SendHeartBeat => {
//开始向master发送心跳了
println(s"======$workId 发送心跳了=============")
masterProxy ! HeadBeat(workId) //此时master将收到心跳信息
}
}
}
object SparkWorker{
def main(args: Array[String]): Unit = {
//参数校验
if(args.length != 4){
println(
"""
|请输入参数:<host> <port> <workName> <masterURL>
""".stripMargin
)
sys.exit() //退出程序
}
val host = args(0)
val port = args(1)
val workName = args(2)
val masterURL = args(3)
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider = "akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname = $host
|akka.remote.netty.tcp.port = $port
""".stripMargin)
val actorSystem = ActorSystem("sparkWorker", config)
//创建自己的actorRef
val workerActorRef = actorSystem.actorOf(Props(new SparkWorker(masterURL)), workName)
//给自己发送一个消息,表示自己已经启动
workerActorRef ! "started"
}
}
package cn.sheep.spark
/**
* worker => master
*
*/
//worker向master注册自己的信息
case class RegisterWorkerInfo(id: String, core: Int, ram: Int)
//worker给master发送心跳信息
case class HeadBeat(id: String)
/**
* master => worker
*/
//worker发送给自己的消息,告诉自己说要开始向master周期性发送消息
case object SendHeartBeat
//master自己给自己发送一个检查超时的worker消息,并启动一个调度器检查超时的worker
case object CheckTimeOutWorker
//master发送给自己的消息,删除超时的Worker
case object RemoveTimeOutWorker
//master向worker发送注册成功消息
case object RegistedWorkerInfo
//存储Worker信息的类
case class WorkerInfo(val id: String, core: Int, ram: Int){
var lastHeatBeatTime: Long = _
}
package cn.sheep.spark
import akka.actor.{Actor, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import scala.concurrent.duration._
class SparkMaster extends Actor{
//存储Worker的信息
val id2WorkerInfo = collection.mutable.HashMap[String, WorkerInfo]()
override def receive: Receive = {
//收到worker注册过来的信息
case RegisterWorkerInfo(wkId, core, ram) => {
if(!id2WorkerInfo.contains(wkId)){
//将worker的信息存储起来,存储到HashMap
val workerInfo = new WorkerInfo(wkId, core, ram)
id2WorkerInfo += ((wkId, workerInfo))
//master存储完worker注册的数据之后,要告诉worker说你已注册完毕
sender() ! RegistedWorkerInfo //此时worker会收到master发送的注册成功消息
}
}
case HeadBeat(wkId) => {
//master收到worker的心跳消息之后,更新worker的上一次心跳时间
val workerInfo: WorkerInfo = id2WorkerInfo(wkId)
//更改心跳时间
val currentTime = System.currentTimeMillis()
workerInfo.lastHeatBeatTime = currentTime
}
//自己调度
case CheckTimeOutWorker => {
import context.dispatcher //使用调度器的时候必须导入调度器
context.system.scheduler.schedule(0 millis, 6000 millis, self, RemoveTimeOutWorker)
}
case RemoveTimeOutWorker => {
//将hashMap中的所有的value都拿出来,查看当前时间和上一次心跳时间的差3000
val workerInfos = id2WorkerInfo.values
val currentTime = System.currentTimeMillis()
//过滤掉超过3秒没有发送心跳的worker
workerInfos.filter(wkInfo => currentTime - wkInfo.lastHeatBeatTime >= 3000)
.foreach(wk => id2WorkerInfo.remove(wk.id))
println(s"还剩${id2WorkerInfo.size}个存活的worker")
}
}
}
object SparkMaster{
def main(args: Array[String]): Unit = {
//参数校验
if(args.length != 3){
println(
"""
|请输入参数:<host> <port> <masterName>
""".stripMargin
)
sys.exit() //退出程序
}
val host = args(0)
val port = args(1)
val masterName = args(2)
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider = "akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname = $host
|akka.remote.netty.tcp.port = $port
""".stripMargin)
val actorSystem = ActorSystem("sparkMaster", config)
//创建自己的actorRef
val masterActorRef = actorSystem.actorOf(Props[SparkMaster], masterName)
//给自己发送一个消息,去启动一个调度器,定期的检测HashMap中超时的worker
masterActorRef ! CheckTimeOutWorker
}
}
2.在spark集群上运行
1.在pom.xml中引入打包插件
2.标注需要打包的主类
3>点击打包
4.在项目的工程目录下–> target,查看jar
5,分别将worker.jar和master.jar上传至spark集群
6.在一台机器上启动master.jar
命令:java -jar master.jar 192.168.163.101 8000 master
7.在另外多台机器启动worker.jar
命令:java -jar worker.jar 192.168.163.103 8000 work_01 akka.tcp://sparkMaster@192.168.163.101:8000/user/master
8,查看运行结果
3.Scala隐式(implicit)转换
掌握implicit可以更好的了解spark底层的源码
package day05
import java.io.File
object MyImpicits {
/**
* 定义方法file2RichFile将file自动转换成RichFile
*/
implicit def file2RichFile(file: File) = new RichFile(file)
}
package day05
import java.io.{BufferedReader, File, FileReader}
import scala.io.Source
object ScalaImplicit {
/**
* 隐式转换
*
* 隐式参数
*
* 隐式的类型转换
*
* 隐式类
*
*
*/
implicit val r = 6
//implicit val a = 1
def say(implicit content: String = "五一过了") = println(content)
def add(a: Int)(implicit b: Int) = a + b
//implicit必须为与参数列表的后面,
//方法的参数如果有多个隐式参数的话,只需要使用一个implicit关键子即可
def addPlus(a: Int)(implicit b: Int, c: Int) = a + b + c
implicit def double2Int(double: Double) = {
println("-------double2Int----------")
double.toInt
}
implicit val fdouble2Int = (double: Double) =>{
println("-----fdouble2Int-----")
double.toInt
}
/**
* 定义方法file2RichFile将file自动转换成RichFile
*/
//implicit def file2RichFile(file: File) = new RichFile(file)
/**
* 隐式类--只能在静态对象中可以使用
*
*
*/
implicit class FileRead(file: File){
def read = Source.fromFile(file).mkString
}
def main(args: Array[String]): Unit = {
say("下午好")
implicit val msg = "你好帅"
//当有多个符合的隐式参数存在时,编译器会产生歧义,抛出异常。
//implicit val msg2 = "你好帅..."
/**
* say方法的参数是隐式参数,如果在调用时没有传递参数的话,编译器在编译时侯会自动的从当前的上下文
* 找一个隐式值(符合参数的类型的隐式值)
*/
say
println(add(5)) //==>11
println(addPlus(5)(7, 1)) //==>13
println(addPlus(4)) //==>16
println("--------隐式类型转换-------")
//age是一个Int类型,但是赋值的时候却是一个浮点型,此刻编译器会在当前的上下文中找一个隐式函数将Double转换成Int类型
val age: Int = 20.5 //在查找隐式转换时,优先查找函数,后查找方法
println(age)
//import MyImpicits._ //导入隐式转换
val file = new File("C:\\Users\\刘元帅\\Desktop\\大数据笔记.txt")
//println("Count = " +file.count())
println(s"FileContent = ${file.read}")
}
}
class RichFile(file: File) {
def count(): Int = {
val fileReader = new FileReader(file)
val bufferedReader = new BufferedReader(fileReader)
var sum = 0
try{
var line = bufferedReader.readLine()
while(line != null){
sum += 1
line = bufferedReader.readLine()
}
}catch{
case _: Exception => sum
}
sum
}
}
4.泛型约束
package day05
import day05.ClothesEnum.ClothesEnum
/**
* 泛型,就是类型约束
*/
abstract class Message[T](context: T)
class StrMessage(context: String) extends Message
class IntMessgae[Int](context: Int) extends Message[Int](context)
//定义一个泛型类衣服
class Clothes[A, B, C](val clothType: A, val color: B, val size: C)
//枚举类型
object ClothesEnum extends Enumeration{
type ClothesEnum = Value
val 上衣, 内衣, 裤子 = Value
}
object ScalaFanXing {
def main(args: Array[String]): Unit = {
val clth1 = new Clothes[ClothesEnum, String, Int](ClothesEnum.上衣, "black", 150)
println(clth1.clothType)
val clth2 = new Clothes[ClothesEnum, String, String](ClothesEnum.上衣, "black", "M")
println(clth1.size)
}
}
5.Scala中的排序
* 对象的比较方法
* 1.类实现Comparable接口,实现compare方法
* 在Scala中对应Comparable的是compared特质
*
* 2.Comparator比较器,提供比较方法。
* 在Scala中对应Comparator的是Ordering
6.对象的上界
在Java中泛型表示某个类型是Test类型的子类型,使用extends关键字:
//或用通配符的形式
<? extends Test>
这种形式也叫upper bounds(上限或上界),同样的意思在Scala的写法为:
[T <: Test]
//或用通配符:
[_ <: Test]
同理下界的表示方法: