在ida中创建maven项目,配置scala环境,调用flink-streaming-scala的api
1, flink: DataStream applications
a,配置pom.xml
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
b,调用flink 流计算 api
socket流,实时wordcount: keyby(“字段名1”).sum(“字段名1”)
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
object Flink1 {
// Data type for words with count
case class WordWithCount(word: String, count: Long)
def main(args: Array[String]): Unit = {
// 获取执行器的环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//获取数据: 从socket中获取
val textDataStream = env.socketTextStream("127.0.0.1", 8888, '\n')
val tupDataStream = textDataStream.flatMap(_.split(" ")).map(WordWithCount(_,1))
//groupby: 按照指定的字段聚合
val windowDstram = tupDataStream.keyBy("word").timeWindow(Time.seconds(5), Time.seconds(1))//窗口bsize=5秒, slid=1s
windowDstram.sum("count").print()
//启动执行器,执行程序
env.execute("Socket Window WordCount")
}
}
/**
#---------------启动nc: 输入数据
wang@wang-pc:/soft/flink/bin$ nc -lk 8888
a
*/
/**
#---------------观察程序输出:
> WordWithCount(a,1)
> WordWithCount(a,1)
> WordWithCount(a,1)
> WordWithCount(a,1)
> WordWithCount(a,1)
*/
2, flink: DataSet applications
a,配置pom.xml
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
b,调用flink 批处理api
读取文本文件,groupBy(元组字段1).sum(元组字段1)
import org.apache.flink.api.scala._
object Flink2 {
def main(args: Array[String]): Unit = {
//获取执行器env
val env = ExecutionEnvironment.getExecutionEnvironment
//读取文件中的数据
val text = env.readTextFile("/home/wang/a.txt")
//对数据过滤,加工处理
val counts = text.flatMap { _.toLowerCase.split("\\W+"). filter { _.nonEmpty } }
.map { (_, 1) }
.groupBy(0)
.sum(1)
//action: 触发任务
counts.print()
counts.writeAsText("flink-wc5")
env.execute("text WordCount")
}
}
/**
#---------------文件中的数据:
wang@wang-pc:/soft/flink$ cat ~/a.txt
a,b
b,b
c,c
#---------------程序输出结果:
(a,1)
(b,3)
(c,2)
*/