从WordCount中学习flatMap和flatMapToPair
一、flatMap
flatMap和flatMapToPair都是对数据拆分,重组为一个数组。
(1)学习致谢
https://blog.csdn.net/weixin_44694973/article/details/95523712
(2)写代码的时候可以先创建一个数组
ArrayList<String> list = new ArrayList<>();
(3)切割
String[] split = s.split(",");
(4)遍历
for (String s1 : split) {
list.add(s1);
}
(5)组合
return list.iterator();
二、flatMapToPair
(1)学习致谢
https://blog.csdn.net/timicai/article/details/109611370
三、代码
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
public class WordCount3 {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("WordCount").setMaster("local");
JavaSparkContext jsc = new JavaSparkContext(conf);
//数据
ArrayList<String> dataList = new ArrayList<>();
dataList.add("one,one,two,hello,world");
dataList.add("hello,world,one,two,three");
JavaRDD<String> list1 = jsc.parallelize(dataList);
List<String> rdd = list1.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String s) throws Exception {
ArrayList<String> list = new ArrayList<>();
String[] split = s.split(",");
// List<String> strings = Arrays.asList(s.split(","));
for (String s1 : split) {
list.add(s1);
}
return list.iterator();
}
}).collect();
for(String str : dataList){ //同for(int i = 0;i<list.size();i++)
System.out.println(str);
}
System.out.println("----------------------------------------------------------------------------");
JavaPairRDD<String, Integer> rdd2 = list1.flatMapToPair(new PairFlatMapFunction<String, String, Integer>() {
@Override
public Iterator<Tuple2<String, Integer>> call(String s) throws Exception {
ArrayList<Tuple2<String, Integer>> list = new ArrayList<>();
List<String> strings = Arrays.asList(s.split(",", -1));
for (String string : strings) {
Tuple2<String, Integer> tuple = new Tuple2<>(string, 1);
list.add(tuple);
}
return list.iterator();
}
});
List<Tuple2<String, Integer>> collect = rdd2.collect();
for (Tuple2<String, Integer> stringIntegerTuple2 : collect) {
System.out.println(stringIntegerTuple2);
}
System.out.println("----------------------------------------------------------------------------");
JavaRDD<String> rdd3 = rdd2.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer integer, Integer integer2) throws Exception {
return integer + integer2;
}
}).map(new Function<Tuple2<String, Integer>, String>() {
@Override
public String call(Tuple2<String, Integer> s) throws Exception {
return s._1 + "," + s._2;
}
});
List<String> collect1 = rdd3.collect();
for (String s : collect1) {
System.out.println(s);
}
}
}
四、结果展示