第一天接触Flink,从小白学起。
1.引入依赖
创建maven项目,并引入Flink的依赖。
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.10.1</version>
</dependency>
</dependencies>
2.创建数据集
3.进行分词处理
package com.flink.wc;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
//做一个批处理的wordcount
public class WordCount {
public static void main(String[] arg) throws Exception {
//创建执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//从文件中读取数据
String inputPath = "/Users/lvqing/Desktop/wm/javacode/flink/src/main/resources/hello.txt";
DataSource inputDataSet = env.readTextFile(inputPath);
//对数据集进行处理,按空格分词展开,转换成(word,1)二元组进行统计
DataSet<Tuple2<String,Integer>> resultSet=inputDataSet.flatMap(new MyflatMapper())
.groupBy(0)//按照第一个位置的word分组
.sum(1);//按照第二个位置上的数据求和
resultSet.print();
}
public static class MyflatMapper implements FlatMapFunction<String, Tuple2<String,Integer>>{
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
//按空格分词
String[] words=value.split(" ");
//遍历所有word,包装成二元祖输出
for(String word:words){
out.collect(new Tuple2<>(word,1));
}
}
}
}
4.执行结果