import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import java.util.Arrays;
import java.util.List;
/**
* intersection算子:
* 求两个RDD的交集,并去重。
*
* 下面代码的结果为[a,b,c]
*
*/
public class IntersectionDemo {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("spark");
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> list1 = Arrays.asList("a","b","c","a");
List<String> list2 = Arrays.asList("a","b","c","d","a");
JavaRDD<String> javaRDD1 = sc.parallelize(list1);
JavaRDD<String> javaRDD2 = sc.parallelize(list2);
//intersection算子,求交集
JavaRDD<String> reduce = javaRDD1.intersection(javaRDD2);
System.err.println(reduce.collect());
}
}
Java Spark算子:intersection
最新推荐文章于 2021-03-06 14:51:27 发布