- 使用mvn创建一个flink-quick-start项目,自定义gav和包名
mvn archetype:generate -DarchetypeGroupId=org.apache.flink -DarchetypeArtifactId=flink-quickstart-java -DarchetypeVersion=1.8.0
- 创建完成后使用windows命令查看所有文件
E:\git_projects>dir /s/b flink-quickstart
- 使用idea打开项目,在pom中添加以下依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-wikiedits_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
- 新建WikipediaAnalysis类,完整版代码如下,点击运行即可:
package com.xicheng;
import org.apache.flink.api.common.functions.FoldFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.wikiedits.WikipediaEditEvent;
import org.apache.flink.streaming.connectors.wikiedits.WikipediaEditsSource;
public class WikipediaAnalysis {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<WikipediaEditEvent> edits = see.addSource(new WikipediaEditsSource());
KeyedStream<WikipediaEditEvent, String> keyedEdits = edits.keyBy(new KeySelector<WikipediaEditEvent, String>() {
@Override
public String getKey(WikipediaEditEvent wikipediaEditEvent) throws Exception {
return wikipediaEditEvent.getUser();
}
});
SingleOutputStreamOperator<Tuple2<String, Long>> result = keyedEdits.timeWindow(Time.seconds(5))
.fold(new Tuple2<>("", 0L), new FoldFunction<WikipediaEditEvent, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> fold(Tuple2<String, Long> tuple2, WikipediaEditEvent o) throws Exception {
tuple2.f0 = o.getUser();
tuple2.f1 += o.getByteDiff();
return tuple2;
}
});
result.print();
see.execute();
}
}
参考文档:https://ci.apache.org/projects/flink/flink-docs-release-1.8/tutorials/datastream_api.html#writing-a-flink-program