搭建maven工程 FlinkTutorial
pom文件
4.0.0
com.atguigu.flink
FlinkTutorial
1.0-SNAPSHOT
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.10.1</version>
</dependency>
</dependencies>
批处理 wordcount
src/main/java/com.atguigu.wc/WordCount.java
public class WordCount {
public static void main(String[] args) throws Exception {
// 创建执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// 从文件中读取数据
String inputPath = "hello.txt";
DataSet<String> inputDataSet = env.readTextFile(inputPath);
// 空格分词打散之后,对单词进行groupby分组,然后用sum进行聚合
DataSet<Tuple2<String, Integer>> wordCountDataSet =
inputDataSet.flatMap(new MyFlatMapper())
.groupBy(0)
.sum(1);
// 打印输出
wordCountDataSet.print();
}
public static class MyFlatMapper implements FlatMapFunction<String, Tuple2<String, Integer>> {
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = value.split(" ");
for (String word : words) {
out.collect(new Tuple2<String, Integer>(word, 1));
}
}
}
}
流处理 wordcount
src/main/scala/com.atguigu.wc/StreamWordCount.java
public class StreamWordCount {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
ParameterTool parameterTool = ParameterTool.fromArgs(args);
String host = parameterTool.get("host");
int port = parameterTool.getInt("port");
DataStream<String> inputDataStream = env.socketTextStream(host, port);
DataStream<Tuple2<String, Integer>> wordCountDataStream = inputDataStream
.flatMap( new WordCount.MyFlatMapper())
.keyBy(0)
.sum(1);
wordCountDataStream.print().setParallelism(1);
env.execute();
}
}
测试——在linux系统中用netcat命令进行发送测试。
nc -lk 7777