java中也是有map算子得
mapToPair: 返回的KV格式的rdd
mapToPair: 返回的KV格式的rdd
map:返回的非KV格式
Ui图如下
自定义分区器代码如下
public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("weatherJava") .setMaster("local[1]"); JavaSparkContext sc = new JavaSparkContext(conf); List<Integer> list = Arrays.asList(99, 88, 200, 20, 101, 102); /** * 通过本地集合创建一个分区器为2的RDD */ JavaRDD<Integer> rdd = sc.parallelize(list, 2); rdd.mapPartitionsWithIndex(new Function2<Integer, Iterator<Integer>, Iterator<Integer>>() { @Override public Iterator<Integer> call(Integer index, Iterator<Integer> iterator) throws Exception { System.out.println("partitionId:" + index); while (iterator.hasNext()) { System.out.print(iterator.next() + "\t"); } return iterator; } }, false).count(); rdd.mapToPair(new PairFunction<Integer, Integer, Integer>() { @Override public Tuple2<Integer, Integer> call(Integer integer) throws Exception { return new Tuple2<>(integer, integer); } }).partitionBy(new Partitioner() { @Override /** * 这个方法的返回值决定了返回的RDd的分区数 */ public int numPartitions() { return 2; } @Override /** * 分区策略 */ public int getPartition(Object key) { int k = (Integer) key; if (k < 100) { return 0; } return 1; } }).map(new Function<Tuple2<Integer, Integer>, Integer>() { @Override public Integer call(Tuple2<Integer, Integer> tuple2) throws Exception { return tuple2._1; } }).mapPartitionsWithIndex(new Function2<Integer, Iterator<Integer>, Iterator<Integer>>() { @Override public Iterator<Integer> call(Integer index, Iterator<Integer> iterator) throws Exception { System.out.println("partitionId:" + index); while (iterator.hasNext()) { System.out.print(iterator.next() + "\t"); } return iterator; } }, false).count(); sc.stop(); }