package sparkcore.day2.lesson01;
import org.apache.spark.HashPartitioner;
import org.apache.spark.Partitioner;
import org.apache.spark.RangePartitioner;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.*;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
*/
public class TransformationOperator {
public static SparkConf conf = new SparkConf().setMaster("local").setAppName("test");
public static JavaSparkContext sc = new JavaSparkContext(conf);
public static void map(){
final List<String> list = Arrays.asList("张无忌", "赵敏", "周芷若");
final JavaRDD<String> rdd = sc.parallelize(list);
final JavaRDD<String> nameRDD = rdd.map(new Function<String, String>() {
@Override
public String call(String name) throws Exception {
return "Hello " + name;
}
});
nameRDD.foreach(new VoidFunction<String>() {
@Override
public void call(String s) throws Exception {
println(s);
}
});
}
public static void flatMap(){
final List<String> list = Arrays.asList("张无忌 赵敏", "宋青书 周芷若");
final JavaRDD<String> rdd = sc.parallelize(list);
rdd.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String names) throws Exception {
return Arrays.asList(names.split(" ")).iterator();
}
}).map(new Function<String, String>() {
@Override
public String call(String name) throws Exception {
return "Hello "+ name;
}
}).foreach(new VoidFunction<String>() {
@Override
public void call(String line) throws Exception {
println(line);
}
});
}
/**
* 从RDD过滤出来偶数
*/
public static void filter(){
final List<Integer> list = Arrays.asList(1, 2, 3, 4, 5, 6, 7);
final JavaRDD<Integer> rdd = sc.parallelize(list);
final JavaRDD<Integer> filterRDD = rdd.filter(new Function<Integer, Boolean>() {
//true 代表这个值我们要
@Override
public Boolean call(Integer number) throws Exception {
return number % 2 == 0;
}
});
filterRDD.foreach(new VoidFunction<Integer>() {
@Override
public void call(Integer integer) throws Exception {
println(integer + "");
}
});
}
/**RDD()
* bykey
*/
public static void groupBykey(){
final List<Tuple2<String, String>> list = Arrays.asList(
new Tuple2<String, String>("峨眉", "周芷若"),
new Tuple2<String, String>("武当", "宋青书"),
new Tuple2<String, String>("峨眉", "灭绝师太"),
new Tuple2<String, String>("武当", "张三丰")
);
final JavaPairRDD<String, String> rdd = sc.parallelizePairs(list);
final JavaPairRDD<String, Iterable<String>> groupBykeyRDD = rdd.groupByKey();
groupBykeyRDD.foreach(new VoidFunction<Tuple2<String, Iterable<String>>>() {
@Override
public void call(Tuple2<String, Iterable<String>> tuple) throws Exception {
final String menpai = tuple._1;
final Iterator<String> iterator = tuple._2.iterator();
println(menpai+ " ");
while (iterator.hasNext()){
final String name = iterator.next();
System.out.print(name);
}
println(&
用java实现spark的rdd接口
最新推荐文章于 2022-09-23 11:00:40 发布
本文介绍了如何在Java中实现Spark的RDD接口,包括map、flatMap、filter、groupByKey、reduceByKey、sortByKey等基本操作,并提供了示例代码进行详细说明。
摘要由CSDN通过智能技术生成