点击来源(ClickSource)
package com.liao.chapter05
import java.util.Calendar
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.watermark.Watermark
import scala.util.Random
class ClickSource extends SourceFunction[Event]{
var running = true
override def run(ctx: SourceFunction.SourceContext[Event]): Unit = {
val random = new Random()
var users = Array("Mary","Alice","Bob","Cary")
var urls = Array("./home","./cart","./fav","./prod?id=1","./prod?id=2","./prod?id=3")
while(running){
var event = Event(users(random.nextInt(users.length)),urls(random.nextInt(urls.length)), Calendar.getInstance.getTimeInMillis)
ctx.collect(event)
Thread.sleep(1000)
}
}
override def cancel(): Unit = running = false
}
JdbcSinkTest
package com.liao.chapter05
import java.sql.{Connection, DriverManager, PreparedStatement}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction
import org.apache.flink.streaming.api.scala._
object JdbcSinkTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.addSource(new ClickSource)
stream.addSink( new MyJdbcSinkFunc() )
env.execute("jdbc sink test")
}
class MyJdbcSinkFunc() extends RichSinkFunction[Event]{
var conn: Connection = _
var insertStmt: PreparedStatement = _
var updateStmt: PreparedStatement = _
override def open(parameters: Configuration): Unit = {
conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test?useSSL=false","root","1234")
insertStmt = conn.prepareStatement("insert into event (user,url) values(?,?) ")
updateStmt = conn.prepareStatement("update event set url = ? where user =? ")
}
override def invoke(value: Event): Unit = {
updateStmt.setString(1,value.url)
updateStmt.setString(2,value.user)
updateStmt.execute()
if( updateStmt.getUpdateCount == 0 ){
insertStmt.setString(1,value.user)
insertStmt.setString(2,value.url)
insertStmt.execute()
}
}
override def close(): Unit = {
insertStmt.close()
updateStmt.close()
conn.close()
}
}
}
分区广播测试
(PartitionBroadcastTest)
package com.liao.chapter05
import org.apache.flink.streaming.api.scala._
object PartitionBroadcastTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.addSource(new ClickSource)
stream.broadcast.print("broadcast").setParallelism(4)
stream.global.print("global").setParallelism(4)
env.execute()
}
}
分区客户测试
(PartitionCustomTest)
package com.liao.chapter05
import org.apache.flink.api.common.functions.Partitioner
import org.apache.flink.streaming.api.scala._
object PartitionCustomTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.fromElements(1,2,3,4,5,6,7,8)
stream.partitionCustom(new Partitioner[Int]{
override def partition(k: Int ,i: Int): Int = {
k % 2
}
} , data => data)
.print("rebalance").setParallelism(4)
env.execute()
}
}
分区兼容性测试
(PartitionReblanceTest)
package com.liao.chapter05
import org.apache.flink.streaming.api.scala._
object PartitionReblanceTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.addSource(new ClickSource)
stream.rebalance.print("rebalance").setParallelism(4)
env.execute()
}
}
分区重新缩放测试(PartitionRescaleTest)
package com.liao.chapter05
import org.apache.flink.streaming.api.functions.source.{RichParallelSourceFunction, SourceFunction}
import org.apache.flink.streaming.api.scala._
object PartitionRescaleTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.addSource(new RichParallelSourceFunction[Int] {
override def run(ctx: SourceFunction.SourceContext[Int]): Unit = {
for (i <- 0 to 7){
if (getRuntimeContext.getIndexOfThisSubtask == (i + 1) % 2)
ctx.collect( i + 1)
}
}
override def cancel(): Unit = ???
}).setParallelism(2)
stream.rescale.print("rescale").setParallelism(4)
env.execute()
}
}
分区随机测试(PartitionShuffleTest )
package com.liao.chapter05
import org.apache.flink.streaming.api.scala._
object PartitionShuffleTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.addSource(new ClickSource)
stream.shuffle.print("shuffle").setParallelism(4)
env.execute()
}
}
SinkToEsTest(水槽测试)
package com.liao.chapter05
import java.util
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.elasticsearch.{ElasticsearchSinkFunction, RequestIndexer}
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink
import org.apache.http.HttpHost
import org.elasticsearch.client.Requests
object SinkToEsTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream :DataStream[Event]= env.fromElements(
Event("Mary", "./home", 1000L),
Event("Bob", "./cart", 2000L),
Event("Bob", "./cart", 3000L),
Event("Alice", "./cart", 3000L),
Event("Mary", "./prod?id=1", 4000L),
Event("Mary", "./prod?id=3", 6000L),
Event("Mary", "./prod?id=2", 5000L)
)
val httpHosts = new util.ArrayList[HttpHost]()
httpHosts.add(new HttpHost("hadoop002", 9200))
val esFun = new ElasticsearchSinkFunction[Event]() {
override def process(t: Event, runtimeContext: RuntimeContext, requestIndexer: RequestIndexer): Unit = {
val data = new util.HashMap[String, String]()
data.put(t.user, t.url)
val indexRequest = Requests.indexRequest()
.index("clicks")
.source(data)
.`type`("event")
requestIndexer.add(indexRequest)
}
}
stream.addSink( new ElasticsearchSink.Builder[Event](httpHosts,esFun).build() )
env.execute()
}
}
SinkToFileTest
(接收器到文件测试)
package com.liao.chapter05
import org.apache.flink.api.common.serialization.SimpleStringEncoder
import org.apache.flink.core.fs.Path
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
import org.apache.flink.streaming.api.scala._
object SinkToFileTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(4)
val stream :DataStream[Event]= env.fromElements(
Event("Mary", "./home", 1000L),
Event("Bob", "./cart", 2000L),
Event("Bob", "./cart", 3000L),
Event("Alice", "./cart", 3000L),
Event("Mary", "./prod?id=1", 4000L),
Event("Mary", "./prod?id=3", 6000L),
Event("Mary", "./prod?id=2", 5000L)
)
var fileSink = StreamingFileSink
.forRowFormat(new Path("./output"), new SimpleStringEncoder[String]("UTF-8"))
.build()
stream.map(_.toString).addSink( fileSink )
env.execute()
}
}
SinkTokafkaTest
package com.liao.chapter05
import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer010, FlinkKafkaProducer011}
object SinkTokafkaTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
var properties = new Properties()
properties.setProperty("bootstrap.servers","hadoop002:9092")
properties.setProperty("group.id","consumer-group")
val stream:DataStream[String] = env.addSource(new FlinkKafkaConsumer010[String]("clicks", new SimpleStringSchema(), properties))
.map(data=>{
val fileds = data.split(",")
Event(fileds(0).trim, fileds(1).trim , fileds(2).trim.toLong).toString
})
stream.addSink(new FlinkKafkaProducer011[String]("hadoop002:9092","events",new SimpleStringSchema()))
env.execute()
}
}
SinkToRedisTest
package com.liao.chapter05
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
object SinkToRedisTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.addSource(new ClickSource)
var conf = new FlinkJedisPoolConfig.Builder()
.setHost("hadoop002")
.build()
stream.addSink(new RedisSink[Event](conf,new MyRedisMapper))
env.execute()
}
class MyRedisMapper extends RedisMapper[Event] {
override def getCommandDescription: RedisCommandDescription = new RedisCommandDescription(RedisCommand.HSET,"clicks")
override def getKeyFromData(t: Event): String = t.user
override def getValueFromData(t: Event): String = t.url
}
}
源边界测试
(SourceBoundedTest)
package com.liao.chapter05
import org.apache.flink.streaming.api.scala._
case class Event(user:String ,url:String ,timestamp: Long)
object SourceBoundedTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream:DataStream[Int]= env.fromElements(1, 2, 3, 4, 5)
val stream1:DataStream[Event] = env.fromElements(Event("Mary", "./home", 1000L),
Event("Bob", "./cart", 2000L)
)
val clicks = List(Event("Mary", "./home", 1000L),Event("Bob", "./cart", 2000L))
val stream2:DataStream[Event] = env.fromCollection(clicks)
val stream3:DataStream[String] = env.readTextFile("input/clicks.txt")
stream.print("number")
stream1.print("1")
stream2.print("2")
stream3.print("3")
env.execute()
}
}
源客户测试
(SourceCustomTest)
package com.liao.chapter05
import org.apache.flink.streaming.api.scala._
object SourceCustomTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream:DataStream[Event] = env.addSource(new ClickSource)
stream.print()
env.execute()
}
}
SourceKafkaTest
package com.liao.chapter05
import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
object SourceKafkaTest {
def main(args: Array[String]): Unit = {
var env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
var properties = new Properties()
properties.setProperty("bootstrap.servers","hadoop002:9092")
properties.setProperty("group.id","consumer-group")
val stream:DataStream[String] = env.addSource(new FlinkKafkaConsumer010[String]("clicks", new SimpleStringSchema(), properties))
stream.print()
env.execute()
}
}
TransformAggTest
package com.liao.chapter05
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.streaming.api.scala._
object TransformAggTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream :DataStream[Event]= env.fromElements(Event("Mary", "./home", 1000L), Event("Bob", "./cart", 2000L),
Event("Alice", "./cart", 3000L),
Event("Mary", "./prod?id=1", 4000L),
Event("Mary", "./prod?id=3", 6000L),
Event("Mary", "./prod?id=2", 5000L)
)
stream.keyBy(new MyKetSelector())
.maxBy("timestamp")
.print()
env.execute()
}
class MyKetSelector() extends KeySelector[Event,String]{
override def getKey(in: Event): String = in.user
}
}
TransformFilterTest
package com.liao.chapter05
import org.apache.flink.api.common.functions.FilterFunction
import org.apache.flink.streaming.api.scala._
object TransformFilterTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream:DataStream[Event] = env.fromElements(Event("Mary", "./home", 1000L), Event("Bob", "./cart", 2000L))
stream.filter( _.user == "Mary" ).print("1")
stream.filter(new UserFilter).print("2")
env.execute()
}
class UserFilter extends FilterFunction[Event]{
override def filter(t: Event): Boolean = t.user == "Mary"
}
}
TransformFlatmapTest
package com.liao.chapter05
import org.apache.flink.api.common.functions.FlatMapFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
object TransformFlatmapTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream :DataStream[Event]= env.fromElements(Event("Mary", "./home", 1000L), Event("Bob", "./cart", 2000L),
Event("Alice", "./cart", 3000L))
stream.flatMap( new MyFlatMap ).print()
env.execute()
}
class MyFlatMap extends FlatMapFunction[Event,String]{
override def flatMap(t: Event, collector: Collector[String]): Unit ={
if(t.user == "Mary"){
collector.collect(t.user)
}
else if(t.user == "Bob"){
collector.collect(t.user)
collector.collect(t.url)
}
}
}
}
TransformMapTest
package com.liao.chapter05
import org.apache.flink.api.common.functions.MapFunction
import org.apache.flink.streaming.api.scala._
object TransformMapTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream:DataStream[Event] =
env.fromElements(Event("Mary","./home",1000L),
Event("Bob", "./cart", 2000L)
)
stream.map( _.user ).print("1")
stream.map(new UserExtractor).print("2")
env.execute()
}
class UserExtractor extends MapFunction[Event,String]{
override def map(t: Event): String = t.user
}
}
TransformReduceTest
package com.liao.chapter05
import org.apache.flink.api.common.functions.ReduceFunction
import org.apache.flink.streaming.api.scala._
object TransformReduceTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream :DataStream[Event]= env.fromElements(
Event("Mary", "./home", 1000L),
Event("Bob", "./cart", 2000L),
Event("Bob", "./cart", 3000L),
Event("Alice", "./cart", 3000L),
Event("Mary", "./prod?id=1", 4000L),
Event("Mary", "./prod?id=3", 6000L),
Event("Mary", "./prod?id=2", 5000L)
)
stream.map( data=>(data.user, 1L) )
.keyBy( _._1 )
.reduce(new MySum())
.keyBy(data => true)
.reduce( (state,data) =>if(data._2 >= state._2) data else state )
.print()
env.execute()
}
class MySum() extends ReduceFunction[(String,Long)]{
override def reduce(t: (String, Long), t1: (String, Long)): (String, Long) = (t._1,t._2 + t1._2)
}
}
TransformRichFunctionTest
package com.liao.chapter05
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
object TransformRichFunctionTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(2)
val stream :DataStream[Event]= env.fromElements(
Event("Mary", "./home", 1000L),
Event("Bob", "./cart", 2000L),
Event("Bob", "./cart", 3000L),
Event("Alice", "./cart", 3000L),
Event("Mary", "./prod?id=1", 4000L),
Event("Mary", "./prod?id=3", 6000L),
Event("Mary", "./prod?id=2", 5000L)
)
stream.map( new MyRichMap )
.print()
env.execute()
}
class MyRichMap() extends RichMapFunction[Event, Long]{
override def open(parameters: Configuration): Unit =
println("索引号为"+getRuntimeContext.getIndexOfThisSubtask + "的任务开始")
override def map(in: Event): Long = in.timestamp
override def close(): Unit =
println("索引号为"+getRuntimeContext.getIndexOfThisSubtask + "的任务结束")
}
}
TransformUDFTest
package com.liao.chapter05
import org.apache.flink.api.common.functions.FilterFunction
import org.apache.flink.streaming.api.scala._
object TransformUDFTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream :DataStream[Event]= env.fromElements(
Event("Mary", "./home", 1000L),
Event("Bob", "./cart", 2000L),
Event("Bob", "./cart", 3000L),
Event("Alice", "./cart", 3000L),
Event("Mary", "./prod?id=1", 4000L),
Event("Mary", "./prod?id=3", 6000L),
Event("Mary", "./prod?id=2", 5000L)
)
stream.filter(new MyFilterFunction("prod"))
.print("1")
stream.filter( new FilterFunction[Event] {
override def filter(t: Event): Boolean = t.url.contains("prod")
} )
.print("2")
stream.filter(_.url.contains("prod"))
.print("3")
env.execute()
}
class MyFilterFunction(keyWord: String) extends FilterFunction[Event]{
override def filter(t: Event): Boolean = t.url.contains(keyWord)
}
}