Spark分布式计算模拟案例:
三个JVM
Client、Executor1、Executor2
案例流程:
启动两个服务器之后,客户端发送数据进行运算后得出结果
案例所建立的类:
Driver: 客户端,进行数据的发送
Executor1: 服务器1,作为第一个计算节点来计算客户端发来的数据
Executor2: 服务器2,作为第二个计算节点来计算客户端发来的数据
Task: 数据结构,里面存放完整数据以及数据的逻辑
SubTask: 计算任务,每个节点所处理的数据以及计算操作
案例的详细实现:
Driver.scala
import java.io.{ObjectOutput, ObjectOutputStream, OutputStream}
import java.net.Socket
object Driver {
def main(args: Array[String]): Unit = {
// 连接服务器
val client1 = new Socket("localhost", 1111)
val client2 = new Socket("localhost", 2222)
val task = new Task()
val out1: OutputStream = client1.getOutputStream
val objOut1 = new ObjectOutputStream(out1)
val subTask = new SubTask()
subTask.logic = task.logic
subTask.datas = task.datas.take(2)
objOut1.writeObject(subTask)
objOut1.flush()
objOut1.close()
client1.close()
val out2: OutputStream = client2.getOutputStream
val objOut2 = new ObjectOutputStream(out2)
val subTask2 = new SubTask()
subTask2.logic = task.logic
subTask2.datas = task.datas.takeRight(2)
objOut2.writeObject(subTask2)
objOut2.flush()
objOut2.close()
client2.close()
println("客户端已将数据发送完!!!")
}
}
Executor1.scala
import java.io.{InputStream, ObjectInputStream}
import java.net.{ServerSocket, Socket}
object Executor1 {
def main(args: Array[String]): Unit = {
// 启动服务器,接收数据
val server = new ServerSocket(1111)
println("服务器[1111]启动,等待接收客户端发来的数据。。。")
// 等待客户端的链接
val client: Socket = server.accept()
val in: InputStream = client.getInputStream
val objIn = new ObjectInputStream(in)
val subTask: SubTask = objIn.readObject().asInstanceOf[SubTask]
val ints: List[Int] = subTask.compute()
println("节点[1111]计算出的结果为: " + ints)
objIn.close()
client.close()
server.close()
}
}
Executor2.scala
object Executor2 {
def main(args: Array[String]): Unit = {
// 启动服务器,接收数据
val server = new ServerSocket(8888)
println("服务器[2222]启动,等待接收客户端发来的数据。。。")
// 等待客户端的链接
val client: Socket = server.accept()
val in: InputStream = client.getInputStream
val objIn = new ObjectInputStream(in)
val subTask: SubTask = objIn.readObject().asInstanceOf[SubTask]
val ints: List[Int] = subTask.compute()
println("节点[2222]计算出的结果为: " + ints)
objIn.close()
client.close()
server.close()
}
}
Task.scala
class Task extends Serializable {
val datas = List(2,4,8,16)
val logic: Int => Int = _ * 2
}
SubTask.scala
class SubTask extends Serializable{
var datas: List[Int] = _
var logic: Int => Int = _
// 计算
def compute() = {
datas.map(logic)
}
}
案例的运行结果: