安装idea后创建一个flink文件,依赖如下:
java版本自行修改
<properties> <flink.version>1.13.0</flink.version> <java.version>10</java.version> <scala.binary.version>2.12</scala.binary.version> <slf4j.version>1.7.30</slf4j.version> </properties> <dependencies> <!-- 引入 Flink 相关依赖--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <!-- 引入日志管理相关依赖--> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>${slf4j.version}</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>${slf4j.version}</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-to-slf4j</artifactId> <version>2.14.0</version> </dependency> </dependencies>
在目录 src/main/resources 下添加文件:log4j.properties,内容配置如下:
log4j.rootLogger=error, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
然后开始第一个统计程序WordCount:
在项目目录下创建一个input目录,然后创建一个统计的txt文件:
创建一个java类,编写java代码:
package com.sangguigu.wc;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import java.util.concurrent.ExecutionException;
public class BatchWordCount {
public static void main(String[] args) throws Exception {
//1.创建执行目录
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//2.从文件读取数据
DataSource<String> lineDataSource = env.readTextFile("input/word.txt");
//3.将每行数据进行分词,转换成二元组类型
FlatMapOperator<String,Tuple2<String,Long>> wordAndOneTuple = lineDataSource.flatMap( (String line, Collector<Tuple2<String,Long>> out) -> {
// 将一行文本进行分词
String[] words = line.split(" ");
// 将每个单词转换成二元组输出
for (String word: words){
out.collect(Tuple2.of(word,1L));
}
})
.returns(Types.TUPLE(Types.STRING,Types.LONG));
// 4.按照word进行分组 0代表二元组的第一个参数进行分组
UnsortedGrouping<Tuple2<String,Long>> wordAndOneGroup = wordAndOneTuple.groupBy(0);
//5.分组内进行聚合统计 1代表二元组的第一个参数进行求和
AggregateOperator<Tuple2<String,Long>> sum = wordAndOneGroup.sum(1);
//打印结果
sum.print();
}
}
运行结果: