数据流分析批处理
main/data/bach.txt
hello moto
hello apple
java代码
package com.example;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
/**
* @author: AIY
* @email: aiykerwin@sina.com
* @Date: 2022/5/29 10:42
*/
public class Hello01FlinkBatch {
public static void main(String[] args) throws Exception {
//创建执行环境
ExecutionEnvironment executionEnvironment = ExecutionEnvironment.getExecutionEnvironment();
//开始读取数据
DataSource<String> dataSource = executionEnvironment.readTextFile("src/main/data/batch.txt");
//开始进行操作
FlatMapOperator<String, String> words = dataSource.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> collector) throws Exception {
String[] words = line.split("\\s");
for (String word : words) {
System.out.println("---" + word);
// 将数据收集起来
collector.collect(word);
}
}
});
MapOperator<String, Tuple2<String, Integer>> pairs = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String word) throws Exception {
return Tuple2.of(word, 1);
}
});
AggregateOperator<Tuple2<String, Integer>> sum = pairs.groupBy(0).sum(1);
sum.print();
}
}
scala
package com.example
import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment}
/**
* @author: AIY
* @email: aiykerwin@sina.com
* @Date: 2022/5/29 11:02
*/
object Hello01FlinkBatchScale {
def main(args: Array[String]): Unit = {
val executionEnvironment = ExecutionEnvironment.getExecutionEnvironment
val lines: DataSet[String] = executionEnvironment.readTextFile("src/main/data/batch.txt")
import org.apache.flink.api.scala._
lines.flatMap(_.split("\\s")).map((_, 1)).groupBy(0).sum(1).print()
}
}
结果
(hello,2)
(apple,1)
(moto,1)