import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @Author: J
* @Version: 1.0
* @CreateTime: 2023/6/13
* @Description: 测试代码
**/
public class FlinkWordCountSocket {
public static void main(String[] args) throws Exception {
// 创建数据流环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 以Socket作为数据源
DataStreamSource<String> socketSource = env.socketTextStream("localhost", 9999);
//env.setRuntimeMode(RuntimeExecutionMode.BATCH); // 如果想改成批处理就加上这行代码
// 通过flapMap对数据进行转换
SingleOutputStreamOperator<Tuple2<String, Integer>> tupleDataStream = socketSource.flatMap((FlatMapFunction<String, Tuple2<String, Integer>>) (s, collector) -> {
String[] split = s.split(" "); // 切分单词
for (String word : split) {
collector.collect(Tuple2.of(word, 1)); // 将单词以Tuple2的形式返回'(word, 1)'
}
}).returns(TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {})); // Tuple2是flink中提供的类型java无法自动推断,所以加上这段代码
// 根据单词进行分组
KeyedStream<Tuple2<String, Integer>, String> keyed = tupleDataStream.keyBy((KeySelector<Tuple2<String, Integer>, String>) stringIntegerTuple2 -> stringIntegerTuple2.f0);
// 根据单词进行聚合
SingleOutputStreamOperator<Tuple2<String, Integer>> result = keyed.sum("f1"); // 这里的参数填索引或者f1都可,f1可理解为字段名
result.print();
env.execute();
}
}
代码中注释写的很清楚了,这里就不啰嗦代码的内容了,说一下注意点
- 导包的时候千万要注意不要导成scala的包(如Tuple2等)
- Socket我这里使用的是本地的socket服务,linux和windows不同,这个自行搜索如何安装相关服务,这里说一下linux中启动一个socket服务的命令
nc -lk 端口号
- 友情提示idea中
ctrl+p
可快速查看方法中应该传入什么参数.