第1关 Transformation - map
package net.educoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
import java.util.Arrays;
import java.util.List;
public class Step1 {
private static SparkConf conf;
private static JavaSparkContext sc;
static {
conf = new SparkConf().setAppName("Step1").setMaster("local");
sc = new JavaSparkContext(conf);
}
public static JavaRDD<Integer> MapRdd() {
List<Integer> list = Arrays.asList(1, 2, 3, 4, 5);
JavaRDD<Integer> rdd = sc.parallelize(list);
JavaRDD<Integer> map = rdd.map(num -> {
if (num % 2 == 0) {
return num * num;
} else {
return num * num * num;
}
});
return map;
}
public static JavaRDD<Tuple2> MapRdd2() {
List<String> list = Arrays.asList("dog", "salmon", "salmon", "rat", "elephant");
JavaRDD<String> rdd = sc.parallelize(list);
JavaRDD<Tuple2> map = rdd.map(str -> {
int i = str.length();
return new Tuple2(str, i);
});
return map;
}
}
第2关 Transformation - mapPartitions
package net.educoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class Step7 {
private static SparkConf conf;
private static JavaSparkContext sc;
static {
conf = new SparkConf().setAppName("Step7").setMaster("local");
sc = new JavaSparkContext(conf);
sc.setLogLevel("ERROR");
}
public static JavaRDD<Integer> MapPartitionsRdd() {
List<Integer> list = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
JavaRDD<Integer> rdd = sc.parallelize(list);
JavaRDD<Integer> rdd1 = rdd.mapPartitions(it -> {
ArrayList<Integer> arrayList = new ArrayList<>();
while (it.hasNext()) {
Integer next = it.next();
int i = next % 2 == 0 ? next * next : next * next * next;
arrayList.add(i);
}
return arrayList.iterator();
});
return rdd1;
}
public static JavaRDD<Tuple2<String, Integer>> MapPartitionsRdd2() {
List<String> list = Arrays.asList("dog", "salmon", "salmon", "rat", "elephant");
JavaRDD<String> rdd = sc.parallelize(list);
JavaRDD<Tuple2<String, Integer>> rdd1 = rdd.mapPartitions(it -> {
ArrayList<Tuple2<String, Integer>> arrayList = new ArrayList<>();
while (it.hasNext()) {
String next = it.next();
arrayList.add(new Tuple2<>(next, next.length()));
}
return arrayList.iterator();
});
return rdd1;
}
}
第3关 Transformation - flatMap
package net.educoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class Step2 {
private static SparkConf conf;
private static JavaSparkContext sc;
static {
conf = new SparkConf().setAppName("Step2").setMaster("local");
sc = new JavaSparkContext(conf);
}
public static JavaRDD<String> FlatMapRdd() {
List<String> list = Arrays.asList("python java", "java scala", "scala python", "c c++", "c# .net");
JavaRDD<String> rdd = sc.parallelize(list, 2);
JavaRDD<String> flatMap = rdd.flatMap(line -> {
ArrayList<String> arrayList = new ArrayList<>();
String[] s = line.split(" ");
for (String str : s
) {
arrayList.add(str);
}
return arrayList.iterator();
});
return flatMap;
}
}
第4关 Transformation - filter
package net.educoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import java.util.Arrays;
import java.util.List;
public class Step3 {
private static SparkConf conf;
private static JavaSparkContext sc;
static {
conf = new SparkConf().setAppName("Step3").setMaster("local");
sc = new JavaSparkContext(conf);
}
public static JavaRDD<Integer> FilterRdd() {
List<Integer> list = Arrays.asList(1, 2, 3, 4, 5);
JavaRDD<Integer> rdd = sc.parallelize(list);
JavaRDD<Integer> filter = rdd.filter(x -> x % 2 != 0 ? true : false);
return filter;
}
public static JavaRDD<String> FilterRdd2() {
List<String> list = Arrays.asList("dog", "salmon", "salmon", "rat", "elephant");
JavaRDD<String> rdd = sc.parallelize(list);
JavaRDD<String> filter = rdd.filter(x -> x.length() < 6 ? true : false);
return filter;
}
}
第5关 Transformation - Distinct And SortBy
package net.educoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import java.util.Arrays;
import java.util.List;
public class Step4 {
private static SparkConf conf;
private static JavaSparkContext sc;
static {
conf = new SparkConf().setAppName("Step4").setMaster("local");
sc = new JavaSparkContext(conf);
}
public static JavaRDD<Integer> DistinctRdd() {
List<Integer> list = Arrays.asList(1, 2, 3, 4, 5, 1, 2, 3, 7, 8, 9, 10, 3, 2);
JavaRDD<Integer> rdd = sc.parallelize(list);
JavaRDD<Integer> distinct = rdd.distinct();
return distinct;
}
public static JavaRDD<Integer> DistinctAndSortRdd() {
List<Integer> list = Arrays.asList(1, 2, 3, 4, 5, 1, 2, 3, 7, 8, 9, 10, 3, 2);
JavaRDD<Integer> rdd = sc.parallelize(list);
JavaRDD<Integer> distinct = rdd.distinct();
JavaRDD<Integer> rdd1 = distinct.sortBy(num -> num, false, 1);
return rdd1;
}
}