二、代码实现
package spark;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import scala.Tuple2;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
public class SparkWordCountForJava {
public static void main(String[] args) {
// 初始化spark , local[]:以核心数在本地运行
SparkConf conf = new SparkConf().setMaster(“local[*]”).setAppName(“SparkWordCountForJava”);
JavaSparkContext jsc = new JavaSparkContext(conf);
JavaRDD textFileRdd = jsc.textFile(“C:\Users\com\Desktop\test.txt”);
// 将数据按照切分规则分成一个个单词
JavaRDD flatMapRdd = textFileRdd.flatMap(new FlatMapFunction<String, String>() {
public Iterator call(String s) throws Exception {
String[] splits = s.split(“\t”);
List list = Arrays.asList(splits);
return list.iterator();
}
});
// 每个单词作为key,value为1
JavaRDD<Tuple2<String, Integer>> mapRdd = flatMapRdd.map(new Function<String, Tuple2<String, Integer>>() {
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<String, Integer>(s, 1);
}
});
// 分组:相同 key 分为一组
JavaPairRDD<String, Iterable<Tuple2<String, Integer>>> groupByRdd = mapRdd.groupBy(new Function<Tuple2<String, Integer>, String>() {
public String call(Tuple2<String, Integer> s) throws Exception {
return s._1;
}
});
// Lmbda 表达式写法 和 mapRdd 、 groupByRdd 值一样
JavaRDD<Tuple2<String, Integer>> mapRdd1 = flatMapRdd.map(s -> new Tuple2<String, Integer>(s, 1));
JavaPairRDD<String, Iterable<Tuple2<String, Integer>>> groupByRdd1 = mapRdd1.groupBy(s -> s._1);
// 相同key,value值累加
JavaPairRDD<String, Integer> mapValuesRdd = groupByRdd.mapValues(new Function<Iterable<Tuple2<String, Integer>>, Integer>() {
public Integer call(Iterable<Tuple2<String, Integer>> v1) throws Exception {
int sum = 0;
for(Tuple2<String, Integer> t:v1) {
sum += t._2;
}
Kafka进阶篇知识点
Kafka高级篇知识点
44个Kafka知识点(基础+进阶+高级)解析如下
由于篇幅有限,小编已将上面介绍的**《Kafka源码解析与实战》、Kafka面试专题解析、复习学习必备44个Kafka知识点(基础+进阶+高级)都整理成册,全部都是PDF文档**
析如下**
[外链图片转存中…(img-HpYBkJMY-1714447038034)]
由于篇幅有限,小编已将上面介绍的**《Kafka源码解析与实战》、Kafka面试专题解析、复习学习必备44个Kafka知识点(基础+进阶+高级)都整理成册,全部都是PDF文档**