广播变量:
package com.fengrui.test
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
* 广播变量
*/
object BroadCastTest {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("BroadCastTest").setMaster("local[*]")
val sc = new SparkContext(conf)
val list = List("zhangsan","lisi")
//创建广播变量
val broList: Broadcast[List[String]] = sc.broadcast(list)
val nameRDD: RDD[String] = sc.parallelize(List("zhangsan","lisi","冯瑞"))
/**
* 过滤出list中不包含nameRDD中的人的名字
*/
val res: RDD[String] = nameRDD.filter(name => {
//用.value取值
val inList: List[String] = broList.value
!inList.contains(name)
})
res.foreach(println)
}
}
累加器:
package com.fengrui.test
import org.apache.spark.rdd.RDD
import org.apache.spark.util.LongAccumulator
import org.apache.spark.{SparkConf, SparkContext}
/**
* 累加器
*/
object AccumulatorTest {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("AccumulatorTest").setMaster("local[*]")
val sc = new SparkContext(conf)
//创建累加器
val accumulator: LongAccumulator = sc.longAccumulator
val lines: RDD[String] = sc.textFile("file:///C:\\Users\\lenovo\\Desktop\\a.txt")
val res: RDD[Unit] = lines.map(one => {
accumulator.add(1)
})
res.collect()
println(s"${accumulator.value}")
}
}