1.流式
windows下载netcat,然后配置路径到path
命令行启动nc -l -p 8000监听端口
启动scala程序,代码如下
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
object StreamWcApp {
//流处理,要往8000端口写数据
def main(args: Array[String]): Unit = {
//从外部命令中获取参数
// val tool: ParameterTool = ParameterTool.fromArgs(args)
// val host: String = tool.get("host")
// val port: Int = tool.get("port").toInt
//自己定义,windows下载https://eternallybored.org/misc/netcat/下载netcat,然后
//配置netcat到path环境变量,自己试一下 nc -l -p 8000 ,然后启动这个脚本
val host: String = "localhost"
val port: Int = 8000
//创建流处理环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//接收socket文本流
val textDstream: DataStream[String] = env.socketTextStream(host, port)
// flatMap和Map需要引用的隐式转换
import org.apache.flink.api.scala._
//处理 分组并且sum聚合
val dStream: DataStream[(String, Int)] = textDstream.flatMap(_.split(" ")).filter(_.nonEmpty).map((_, 1)).keyBy(0).sum(1)
//打印
dStream.print()
env.execute()
}
}
运行
2.批处理
package worldcount
import org.apache.flink.api.scala.{AggregateDataSet, DataSet, ExecutionEnvironment}
object WordCount {
//批处理
def main(args: Array[String]): Unit = {
//构造执行环境
val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
//读取文件
val input = "C:\\Users\\Administrator\\IdeaProjects\\UserBehaviorAnalysis\\HotItemsAnalysis\\src\\main\\resources\\1.txt"
val ds: DataSet[String] = env.readTextFile(input)
// 其中flatMap 和Map 中 需要引入隐式转换
import org.apache.flink.api.scala.createTypeInformation
//经过groupby进行分组,sum进行聚合
val aggDs: AggregateDataSet[(String, Int)] = ds.flatMap(_.split(" ")).map((_, 1)).groupBy(0).sum(1)
// 打印
aggDs.print()
}
}
运行结果