distinct:对RDD中的元素进行去重。
first:返回RDD中第一个元素。
package com.cb.spark.sparkrdd;
import java.util.Arrays;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
public class DistinctFirst {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("DistinctFirst").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> javaRDD = sc.parallelize(Arrays.asList("cat", "dog", "pig", "mouse", "cat", "Rat", "dog"));
// mouse Rat dog pig cat
javaRDD.distinct().foreach(x -> System.out.print(x + " "));
// cat
System.out.println(javaRDD.first());
sc.stop();
}
}