批处理
import org.apache.flink.api.scala.{ExecutionEnvironment, createTypeInformation}
object WordCount {
def main(args: Array[String]): Unit = {
// 先创建执行环境
val env = ExecutionEnvironment.getExecutionEnvironment
// 从文件中读取数据
val inputPath = "..\\data\\hello.txt"
val inputDataSet = env.readTextFile(inputPath);
// 批处理WordCount
val wordCountDataSet = inputDataSet.flatMap(_.split(' '))
.map((_,1))
.groupBy(0)
.sum(1)
wordCountDataSet.print()
}
}
----------
(scala,1)
(flink,1)
(world,1)
(hello,3)
流处理
import org.apache.flink.api.scala.createTypeInformation
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
object WordCount1 {
def main(args: Array[String]): Unit = {
// 先创建执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 创建一个socket流
val dataStream = env.socketTextStream("localhost", 7777)
// 流处理WordCount
val wordCountDataStream = dataStream.flatMap(_.split(' '))
.filter(_.nonEmpty)
.map((_,1))
.keyBy(0)
.sum(1)
wordCountDataStream.print()
// 启动executor
env.execute("stream word count job")
}
}
----------
3> (hello,1)
5> (world,1)
1> (scala,1)
3> (hello,2)
3> (do,1)
5> (you,1)
6> (how,1)
3> (do,2)
流处理需要用虚拟机连接接受数据的端口,传入数据。