检查点
本地检查点
object CheckPointDemo {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("checkPointDemo")//local[*]-*分区数
val sc: SparkContext = SparkContext.getOrCreate(conf)
sc.setCheckpointDir("file://****")//设置检查点保存路径
val rdd: RDD[String] = sc.parallelize(Array(("hello,1"), ("world,2"), ("java,3"), ("scala,4")))
rdd.checkpoint()
rdd.collect.foreach(println)
println("rdd是否设置了检查点:" + rdd.isCheckpointed)
//println(rdd.getCheckpointFile)//打印检查点路径
}
}
hdfs检查点
object CheckPointDemo {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster("local[4]").setAppName("checkPointDemo")
val sc: SparkContext = SparkContext.getOrCreate(conf)
sc.setCheckpointDir("hdfs://cp145:9000/tmp/checkpoint")
val rdd: RDD[String] = sc.parallelize(Array(("hello,1"), ("world,2"), ("java,3"), ("scala,4")))
rdd.checkpoint()
rdd.collect.foreach(println)
println("rdd是否设置了检查点:" + rdd.isCheckpointed)
println(rdd.getCheckpointFile)
}
}
广播
object BroadCastDemo {
def main(args: Array[String]): Unit = {
val arr = Array("hello","hi","come on baby")
val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("broadCast")
val sc: SparkContext = SparkContext.getOrCreate(conf)
val broadcastVar: Broadcast[Array[String]] = sc.broadcast(arr)
val rdd: RDD[(Int, String)] = sc.parallelize(Array((1, "leader"), (2, "teamLeader"), (3, "worker")))
val rdd2: RDD[(Int, String)] = rdd.mapValues(x => {
println("value is :" + x)
broadcastVar.value(2) + ":" + x
})
rdd2.collect().foreach(println)
}
}