依赖
<properties> <flink.version>1.11.2</flink.version> <scala.binary.version>2.11</scala.binary.version> <log4j.version>2.12.1</log4j.version> </properties> <dependencies> <dependency> <!--COMMON依赖--> <groupId>org.apache.flink</groupId> <artifactId>flink-walkthrough-common_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <!--Flink的scala依赖,也可以换成java--> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <!--Flink的client依赖,在本地执行需要导入这个依赖--> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <!--读取log4j文件--> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-slf4j-impl</artifactId> <version>${log4j.version}</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-api</artifactId> <version>${log4j.version}</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-core</artifactId> <version>${log4j.version}</version> </dependency> </dependencies>
log4j配置文件内容
rootLogger.level=info rootLogger.appenderRef.console.ref=ConsoleAppender logger.sink.name=org.apache.flink.walkthrough.common.sink.AlertSink logger.sink.level=INFO appender.console.name=ConsoleAppender appender.console.type=CONSOLE appender.console.layout.type=PatternLayout appender.console.layout.pattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
首先我们在虚拟机上开启一个端口: nc -lk 8888
后续在里面打数据
启动flink程序
注意这里使用了导入隐式转换:import org.apache.flink.streaming.api.scala._
因为在算子内部有这个[R: TypeInformation] ,类型转换,不导入隐式转换就会编译报错
def flatMap[R: TypeInformation](fun: T => TraversableOnce[R]): DataStream[R] = { if (fun == null) { throw new NullPointerException("FlatMap function must not be null.") } val cleanFun = clean(fun) val flatMapper = new FlatMapFunction[T, R] { def flatMap(in: T, out: Collector[R]) { cleanFun(in) foreach out.collect } } flatMap(flatMapper) }
import org.apache.flink.streaming.api.scala._
object WordCount {
def main(args: Array[String]): Unit = {
//创建环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//设置默认的并行度
env.setParallelism(2)
/**
* 读取socket数据
*/
val lineDS: DataStream[String] = env.socketTextStream("doker",8888)
/**
* 通过逗号划分
*/
val wordDS: DataStream[String] = lineDS.flatMap(_.split(","))
/**
* 转换成kv格式
*/
val kvDS: DataStream[(String, Int)] = wordDS.map((_,1))
/**
* 按照单词分组
*/
val keyByDS: KeyedStream[(String, Int), String] = kvDS.keyBy(_._1)
val countDS = keyByDS.reduce((x, y) => (x._1, x._2 + y._2))
/**
* 打印结果
*/
countDS.print()
/**
* 执行flink代码
*/
env.execute()
}
}
flink的map task和reduce task是同时启动的,数据会被逐条处理