package com.shujia.transformation
import org.apache.flink.api.common.functions.MapFunction
import org.apache.flink.streaming.api.scala._
object Demo01Map {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val words: DataStream[String] = env.socketTextStream("master", 8888)
//scala 风格
words.map(word=>(word,1)).print()
//java 风格
words.map(new MapFunction[String,(String,Int)] {
override def map(value:String): (String, Int) ={
(value,1)
}
}).print()
env.execute()
}
}
package com.shujia.transformation
import org.apache.flink.api.common.functions.FlatMapFunction
import org.apache.flink.api.scala.createTypeInformation
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
object Demo02FlatMap {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//传入一行数据 可能包含多个单词 之间用 逗号 分隔
//每个单词输出一行
val linesDS: DataStream[String] = env.socketTextStream("master", 8888)
//scala 风格
linesDS.flatMap(line => line.split(","))
//java 风格
linesDS.flatMap(new FlatMapFunction[String,String] {
//value 表示每一行数据
//out 用于将数据 逐条 发出
override def flatMap(value: String, out:Collector[String]): Unit = {
val arr: Array[String] = value.split(",")
for (elem <- arr) {
out.collect(elem)
}
}
}).print()
env.execute()
}
}
package com.shujia.transformation
import org.apache.flink.api.common.functions.FilterFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
object Demo03Filter {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//读取student.txt 过滤出文科班的学生
val stuDS: DataStream[String] = env.readTextFile("Flink/data/stu/students.txt")
//scala 风格
stuDS.filter(line=>line.split(",")(4).contains("文科")).print()
//java 风格
stuDS.filter(new FilterFunction[String] {
override def filter(value: String): Boolean = {
var flag=false
val arr: Array[String] = value.split(",")
if(arr.length==5){
if(arr(4).contains("文科")){
flag=true
}
}
flag
}
}).print()
env.execute()
}
}
package com.shujia.transformation
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.streaming.api.scala._
object Demo04KeyBy {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
// 读取students.txt 过滤出文科班的学生
val stuDS: DataStream[String] = env.readTextFile("Flink/data/stu/students.txt")
// 统计每个班级的性别人数
stuDS
.map(line => {
val stuArr: Array[String] = line.split(",")
(stuArr(4) + "," + stuArr(3), 1)
})
.keyBy(new KeySelector[(String, Int), String] {
override def getKey(value: (String, Int)): String = {
value._1
}
})
.sum(1)
.print()
env.execute()
}
}