一、广播变量
public class JavaExample {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setMaster("local").setAppName("JavaExample");
JavaSparkContext sc = new JavaSparkContext(conf);
final List<String> list = Arrays.asList("hello world", "hello spark");
//广播变量
final Broadcast<List<String>> broadcast = sc.broadcast(list);
JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("hello world", "hello spark", "hello java"));
JavaRDD<String> rdd2 = rdd1.filter(new Function<String, Boolean>() {
public Boolean call(String line) throws Exception {
return !broadcast.value().contains(line);
}
});
rdd2.foreach(new VoidFunction<String>() {
public void call(String s) throws Exception {
System.out.println(s);
}
});
}
}
二、累加器
object ScalaExample {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("ScalaExample")
val sc = new SparkContext(conf);
val rdd1 = sc.textFile("C://words.txt")
// 累加器
val accumulator = sc.accumulator(0);
rdd1.map(line =>{
accumulator.add(1)
println(accumulator)
line
}).collect()
println(" i = " +accumulator.value)
}
}