Flink批处理
import org.apache.flink.api.scala._
object WordCount {
def main(args: Array[String]): Unit = {
//创建一个批处理的执行环境
val env = ExecutionEnvironment.getExecutionEnvironment
//从文件中读取数据
val inputPath = "E:\\IntelliJ IDEA 2019.2.4\\FlinkTutorial\\src\\main\\resources\\hello"
val inputDataSet = env.readTextFile(inputPath)
//分词之后做count
val wordContDataSet = inputDataSet.flatMap(_.split(" "))
.map((_,1))
.groupBy(0)
.sum(1)
//打印输出
wordContDataSet.print()
}
}
1.创建数据
2.执行代码
3.结果:
Flink流处理
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala._
object StreamWordCount {
def main(args: Array[String]): Unit = {
val params = ParameterTool.fromArgs(args)
val host: String = params.get("host")
val port: Int = params.getInt("port")
// 创建一个流处理的执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
// env.setParallelism(1)
// env.disableOperatorChaining()
// 接收socket数据流
val textDataStream = env.socketTextStream(host, port)
// 逐一读取数据,分词之后进行wordcount
val wordCountDataStream = textDataStream.flatMap(_.split("\\s"))
.filter(_.nonEmpty).startNewChain()
.map( (_, 1) )
.keyBy(0)
.sum(1)
// 打印输出
wordCountDataStream.print().setParallelism(1)
// 执行任务
env.execute("stream word count job")
}
}
1.参数设置:
添加服务器IP和端口:
2.在服务器上执行:
nc -lk 6666
注:端口6666可以修改。
3.执行代码
4.结果: