import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import java.util.Arrays;
import java.util.List;
/**
* filter(function)算子
* 返回符合function函数条件的元素,构成一个新的RDD
*
*/
public class FilterDemo {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("spark");
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> list = Arrays.asList("tyd", "ttd", "tydd", "ttyy", "tddy", "tyty");
JavaRDD<String> javaRDD = sc.parallelize(list);
//filter算子的一般形式
JavaRDD<String> javaRDD1 = javaRDD.filter(new Function<String, Boolean>() {
@Override
public Boolean call(String s) throws Exception {
return s.contains("ty");
}
});
System.out.println(javaRDD1.collect());
//filter算子的lambda表达式形式
JavaRDD<String> javaRDD2 = javaRDD.filter(s -> s.contains("dd"));
System.out.println(javaRDD2.collect());
}
}
Java Spark算子:filter
最新推荐文章于 2023-05-30 23:30:47 发布