package com.lenovo.sparkSqlDemo
import org.apache.spark.{SparkConf, SparkContext}
//测试广播变量的使用
object BroadcastDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[3]").setAppName("broadcastDemo")
val sc = new SparkContext(conf)
//声明累加器
var count=sc.longAccumulator("count")
//创建广播变量
val broadcastMap = Map("0"->"男","1"->"女")
val bcMap = sc.broadcast(broadcastMap)
val userList = List(
"001,张三,28,0",
"002,李四,18,1",
"003,王五,38,0",
"004,zhaoliu,38,-1"
)
sc.parallelize(userList,3)
.map(
info=>{
//获取数据中的性别标志位数据
val str = info.substring(info.lastIndexOf(",")+1)
//获取除性别标志位外的全部数据
val prefix = info.substring(0,info.lastIndexOf(","))
val value = bcMap.value
//使用广播变量解析性别的实际值
val sex = value.getOrElse(str,"未知")
//拼接完整数据
prefix+","+sex
}
).map(tuple=>(tuple,1))
.foreach(
//使用累加器统计数据条数
tuple=>count.add(tuple._2)
)
println(count.value)
sc.stop()
}
}
spark中累加器和广播变量的使用
最新推荐文章于 2023-02-05 22:10:35 发布