import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import java.util.Arrays;
import java.util.List;
/**
* union() 算子:
* 取两个RDD的并集,不去重,会增加partition(分区)的数量。
*
*/
public class UnionDemo {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("spark");
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> list1 = Arrays.asList("i","love","u");
List<String> list2 = Arrays.asList("soryy","maybe");
//创建一个2分区的RDD
JavaRDD<String> list1Rdd = sc.parallelize(list1,2);
//创建一个1分区的RDD
JavaRDD<String> list2Rdd = sc.parallelize(list2);
//union算子,求并集。result会有三个分区。
JavaRDD<String> result = list1Rdd.union(list2Rdd);
//转化成list输出
System.out.println(result.collect());
}
}
Java Spark算子:union
最新推荐文章于 2022-06-11 10:00:23 发布