import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import java.util.Arrays;
import java.util.List;
/**
* union() 算子:
* 取两个RDD的并集,不去重,会增加partition(分区)的数量,同时并行度也会增加
*
*/
public class UnionDemo {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("spark");
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> list1 = Arrays.asList("i","love","u");
List<String> list2 = Arrays.asList("soryy","maybe");
JavaRDD<String> list1Rdd = sc.parallelize(list1,2);
JavaRDD<String> list2Rdd = sc.parallelize(list2);
//union算子
JavaRDD<String> result = list1Rdd.union(list2Rdd);
//转化成list输出
System.out.println(result.collect());
}
}
Java Spark算子:union
最新推荐文章于 2022-08-01 15:57:17 发布