1.[代码][Java]代码
public static void mapTest(JavaSparkContext spark){
JavaRDD rdd1 = spark.parallelize(Arrays.asList(1,2,3,4,5,6),2);
// 测试 map函数
JavaRDD result = rdd1.map(f -> f*2);
System.out.println("[测试 map函数]MAP返回结果:"+result.count());
result.foreach(id->{System.out.println(id + ";");});
System.out.println("===========================================================");
// 测试mapPartition函数
JavaRDD rdd2 = spark.parallelize(Arrays.asList(1,2,3,4,5,6,7,8,9),3);
System.out.println("rdd2分区大小:" + rdd2.partitions().size());
JavaRDD result1 = rdd2.mapPartitions(t -> {
List list = new ArrayList();
int a = 0;
while(t.hasNext()){
a += t.next();
}
list.add(a);
return list;
},true);
System.out.println("result1分区大小:" + result1.partitions());
System.out.println("[测试mapPartition函数]mapPartition返回结果:"+result1.count());
result1.foreach(System.out::println);
System.out.println("===========================================================");
//测试mapPartition函数
JavaRDD> result3 = rdd2.mapPartitions(t -> {
List> list = new ArrayList>();
int key = t.next();
while(t.hasNext()){
int value = t.next();
list.add(new Tuple2(key,value));
key = value;
}
return list;
});
System.out.println("[测试mapPartition函数, 另一种写法]返回结果:"+result3.count());
result3.foreach(System.out::println);
System.out.println("===========================================================");
//测试mapValues函数
Tuple2 t1 = new Tuple2(1, 3);
Tuple2 t2 = new Tuple2(1, 2);
Tuple2 t3 = new Tuple2(1, 4);
Tuple2 t4 = new Tuple2(2, 3);
List> list = new ArrayList>();
list.add(t1);
list.add(t2);
list.add(t3);
list.add(t4);
JavaPairRDD rddValue = spark.parallelizePairs(list);
JavaPairRDD resultValue = rddValue.mapValues(v -> v*2);
System.out.println("[测试mapValues函数]返回结果:");
resultValue.foreach(t -> {System.out.println(t._1 +"="+t._2());});
System.out.println("===========================================================");
// 测试mapPartitionsWithIndex函数
//Function2 removeHeader= new Function2, Iterator>(){
// @Override
// public Iterator call(Integer ind, Iterator iterator) throws Exception {
// if(ind==0 && iterator.hasNext()){
// iterator.next();
// return iterator;
// }else
// return iterator;
// }
//};
JavaRDD rddWith = spark.parallelize(Arrays.asList(1,2,3,4,5),2);
JavaRDD rddResult = rddWith.mapPartitionsWithIndex((x,it)->{
List midList = new ArrayList();
int a = 0;
while(it.hasNext()){
a += it.next();
}
midList.add(x + "|" + a);
return midList.iterator();
},false);
System.out.println("[测试mapPartitionsWithIndex函数]返回结果:"+rddResult.count());
System.out.println(rddResult.collect());
//测试 flatMap函数
JavaRDD rddFlatMap = spark.parallelize(Arrays.asList(1,2,3,4),2);
JavaRDD rddFlatMapResult = rddFlatMap.flatMap(t-> {
List listFlat = new ArrayList();
for(int i =1; i <= t; i++){
listFlat.add(i);
}
return listFlat;
});
System.out.println("[测试flatMap函数]返回结果:"+rddFlatMapResult.count());
System.out.println(rddFlatMapResult.collect());
}