一、准备
1.1、Event类
package com. hpsk. flink. beans ;
import java. sql. Timestamp ;
public class Event {
public String user;
public String url;
public Long timestamp;
public Event ( ) {
}
public Event ( String user, String url, Long timestamp) {
this . user = user;
this . url = url;
this . timestamp = timestamp;
}
@Override
public String toString ( ) {
return "Event{" +
"user='" + user + '\'' +
", url='" + url + '\'' +
", timestamp=" + new Timestamp ( timestamp) +
'}' ;
}
}
1.2、EventWithWatermarkSource类
package com. hpsk. flink. source ;
import com. hpsk. flink. beans. Event ;
import org. apache. flink. streaming. api. functions. source. ParallelSourceFunction ;
import org. apache. flink. streaming. api. watermark. Watermark ;
import java. util. Calendar ;
import java. util. Random ;
public class EventWithWatermarkSource implements ParallelSourceFunction < Event > {
private boolean isRunning = true ;
String [ ] users = new String [ ] { "Alice" , "Bob" , "Mary" , "Tom" } ;
String [ ] urls = new String [ ] { "./home" , "./cart" , "./prod?id=1" , "./prod?id=10" } ;
@Override
public void run ( SourceContext < Event > ctx) throws Exception {
Random random = new Random ( ) ;
while ( isRunning) {
String user = users[ random. nextInt ( users. length) ] ;
String url = urls[ random. nextInt ( urls. length) ] ;
long currTs = Calendar . getInstance ( ) . getTimeInMillis ( ) ;
Event event = new Event ( user, url, currTs) ;
ctx. collectWithTimestamp ( event, currTs) ;
ctx. emitWatermark ( new Watermark ( event. timestamp - 1L ) ) ;
Thread . sleep ( 1000L ) ;
}
}
@Override
public void cancel ( ) {
isRunning = false ;
}
}
二、处理函数分类
2.1、ProcessFunction
2.1.1、函数详解
public abstract class ProcessFunction < I , O > extends AbstractRichFunction {
public abstract void processElement ( I value, Context ctx, Collector < O > out) throws Exception ;
public void onTimer ( long timestamp, OnTimerContext ctx, Collector < O > out) throws Exception { }
public abstract class Context { . . . . .
}
}
2.1.2、基本使用
package com. hpsk. flink. function ;
import com. hpsk. flink. beans. Event ;
import com. hpsk. flink. source. EventWithWatermarkSource ;
import org. apache. flink. streaming. api. datastream. DataStream ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. ProcessFunction ;
import org. apache. flink. util. Collector ;
import org. apache. flink. util. OutputTag ;
import java. sql. Timestamp ;
public class ProcessFunctionDS {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
DataStream < Event > inputDS = env. addSource ( new EventWithWatermarkSource ( ) ) ;
inputDS. print ( "input " ) ;
OutputTag < Event > outputTag = new OutputTag < Event > ( "event" ) { } ;
SingleOutputStreamOperator < Event > result = inputDS. process ( new ProcessFunction < Event , Event > ( ) {
@Override
public void processElement ( Event value, Context ctx, Collector < Event > out) throws Exception {
long currTs = ctx. timerService ( ) . currentProcessingTime ( ) ;
if ( ! value. user. equals ( "Bob" ) ) {
out. collect ( value) ;
} else {
ctx. output ( outputTag, value) ;
}
System . out. println ( new Timestamp ( currTs) + " 数据到达时间:" + value) ;
}
} ) ;
result. print ( "output " ) ;
result. getSideOutput ( outputTag) . print ( "outputTag " ) ;
env. execute ( ) ;
}
}
2.2、KeyedProcessFunction
2.2.1、函数详解
public abstract class KeyedProcessFunction < K , I , O > extends AbstractRichFunction {
public abstract void processElement ( I value, Context ctx, Collector < O > out) throws Exception ;
public void onTimer ( long timestamp, OnTimerContext ctx, Collector < O > out) throws Exception { }
public abstract class Context { . . . . .
}
}
2.2.2、基本使用
package com. hpsk. flink. function ;
import com. hpsk. flink. beans. Event ;
import com. hpsk. flink. source. EventWithWatermarkSource ;
import org. apache. flink. streaming. api. datastream. DataStream ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. KeyedProcessFunction ;
import org. apache. flink. util. Collector ;
import java. sql. Timestamp ;
public class KeyedProcessFunctionDS {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
DataStream < Event > inputDS = env. addSource ( new EventWithWatermarkSource ( ) ) ;
inputDS. print ( "input " ) ;
SingleOutputStreamOperator < String > result = inputDS
. keyBy ( t -> t. user)
. process ( new KeyedProcessFunction < String , Event , String > ( ) {
@Override
public void processElement ( Event value, Context ctx, Collector < String > out) throws Exception {
out. collect ( "数据到达,时间戳为:" + new Timestamp ( ctx. timestamp ( ) ) ) ;
out. collect ( " 数据到达,水位线为: " + new Timestamp ( ctx. timerService ( ) . currentWatermark ( ) ) + "\n -------分割线-------" ) ;
ctx. timerService ( ) . registerEventTimeTimer ( ctx. timestamp ( ) + 10 * 1000L ) ;
}
@Override
public void onTimer ( long timestamp, OnTimerContext ctx, Collector < String > out) throws Exception {
out. collect ( "定时器触发,触发时间:" + new Timestamp ( timestamp) ) ;
}
} ) ;
result. print ( "output " ) ;
env. execute ( ) ;
}
}
2.3、ProcessWindowFunction
2.3.1、函数详解
public abstract class ProcessWindowFunction < IN, OUT, KEY, W extends Window >
extends AbstractRichFunction {
public abstract void process (
KEY key, Context context, Iterable < IN> elements, Collector < OUT> out) throws Exception ;
2.3.2、基本使用
package com. hpsk. flink. function ;
import com. hpsk. flink. beans. Event ;
import com. hpsk. flink. source. EventWithWatermarkSource ;
import org. apache. flink. streaming. api. datastream. DataStream ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. windowing. ProcessWindowFunction ;
import org. apache. flink. streaming. api. windowing. assigners. TumblingEventTimeWindows ;
import org. apache. flink. streaming. api. windowing. time. Time ;
import org. apache. flink. streaming. api. windowing. windows. TimeWindow ;
import org. apache. flink. util. Collector ;
import java. sql. Timestamp ;
import java. util. HashSet ;
public class ProcessWindowFunctionDS {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 2 ) ;
DataStream < Event > inputDS = env. addSource ( new EventWithWatermarkSource ( ) ) ;
inputDS. print ( "input " ) ;
SingleOutputStreamOperator < String > result = inputDS
. keyBy ( t -> t. user)
. window ( TumblingEventTimeWindows . of ( Time . seconds ( 10 ) ) )
. process ( new ProcessWindowFunction < Event , String , String , TimeWindow > ( ) {
@Override
public void process ( String s, Context ctx, Iterable < Event > elements, Collector < String > out) throws Exception {
HashSet < String > urls = new HashSet < > ( ) ;
for ( Event element : elements) {
urls. add ( element. url) ;
}
Timestamp start = new Timestamp ( ctx. window ( ) . getStart ( ) ) ;
Timestamp end = new Timestamp ( ctx. window ( ) . getEnd ( ) ) ;
out. collect ( "窗口:[ " + start + "~" + end + " ] ->的用户" + s + "总计访问" + urls. size ( ) + "个页面。" ) ;
}
} ) ;
result. print ( "output " ) ;
env. execute ( ) ;
}
}
2.4、ProcessAllWindowFunction
2.4.1、函数详解
public abstract class ProcessAllWindowFunction < IN, OUT, W extends Window >
extends AbstractRichFunction {
public abstract void process ( Context context, Iterable < IN> elements, Collector < OUT> out)
throws Exception ;
2.4.2、基本使用
package com. hpsk. flink. function ;
import com. hpsk. flink. beans. Event ;
import com. hpsk. flink. source. EventWithWatermarkSource ;
import org. apache. flink. streaming. api. datastream. DataStream ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. windowing. ProcessAllWindowFunction ;
import org. apache. flink. streaming. api. windowing. assigners. TumblingEventTimeWindows ;
import org. apache. flink. streaming. api. windowing. time. Time ;
import org. apache. flink. streaming. api. windowing. windows. TimeWindow ;
import org. apache. flink. util. Collector ;
import java. sql. Timestamp ;
import java. util. HashSet ;
public class ProcessAllWindowFunctionDS {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
DataStream < Event > inputDS = env. addSource ( new EventWithWatermarkSource ( ) ) ;
inputDS. print ( "input " ) ;
SingleOutputStreamOperator < String > result = inputDS
. windowAll ( TumblingEventTimeWindows . of ( Time . seconds ( 10 ) ) )
. process ( new ProcessAllWindowFunction < Event , String , TimeWindow > ( ) {
@Override
public void process ( Context ctx, Iterable < Event > elements, Collector < String > out) throws Exception {
HashSet < String > users = new HashSet < > ( ) ;
HashSet < String > urls = new HashSet < > ( ) ;
for ( Event element : elements) {
urls. add ( element. url) ;
users. add ( element. user) ;
}
Timestamp start = new Timestamp ( ctx. window ( ) . getStart ( ) ) ;
Timestamp end = new Timestamp ( ctx. window ( ) . getEnd ( ) ) ;
out. collect ( "窗口:[ " + start + "~" + end + " ] -> 总计用户" + users. size ( ) + ",总计访问" + urls. size ( ) + "个独立页面。" ) ;
}
} ) ;
result. print ( "output " ) ;
env. execute ( ) ;
}
}
2.5、CoProcessFunction
2.5.1、函数详解
public abstract class CoProcessFunction < IN1, IN2, OUT> extends AbstractRichFunction {
public abstract void processElement1 ( IN1 value, Context ctx, Collector < OUT> out)
throws Exception ;
public abstract void processElement2 ( IN2 value, Context ctx, Collector < OUT> out)
throws Exception ;
public void onTimer ( long timestamp, OnTimerContext ctx, Collector < OUT> out) throws Exception { }
2.5.2、基本使用
package com. hpsk. flink. function ;
import org. apache. flink. api. common. eventtime. SerializableTimestampAssigner ;
import org. apache. flink. api. common. eventtime. WatermarkStrategy ;
import org. apache. flink. api. common. state. ValueState ;
import org. apache. flink. api. common. state. ValueStateDescriptor ;
import org. apache. flink. api. common. typeinfo. Types ;
import org. apache. flink. api. java. tuple. Tuple3 ;
import org. apache. flink. api. java. tuple. Tuple4 ;
import org. apache. flink. configuration. Configuration ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. co. CoProcessFunction ;
import org. apache. flink. util. Collector ;
public class CoProcessFunctionDS {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
SingleOutputStreamOperator < Tuple3 < String , String , Long > > appStream = env. fromElements (
Tuple3 . of ( "order-1" , "app" , 1000L ) ,
Tuple3 . of ( "order-2" , "app" , 2000L )
) . assignTimestampsAndWatermarks ( WatermarkStrategy . < Tuple3 < String ,
String , Long > > forMonotonousTimestamps ( )
. withTimestampAssigner ( new SerializableTimestampAssigner < Tuple3 < String , String , Long > > ( ) {
@Override
public long extractTimestamp ( Tuple3 < String , String , Long > element, long recordTimestamp) {
return element. f2;
}
} )
) ;
SingleOutputStreamOperator < Tuple4 < String , String , String , Long > > thirdpartStream = env. fromElements (
Tuple4 . of ( "order-1" , "third-party" , "success" , 3000L ) ,
Tuple4 . of ( "order-3" , "third-party" , "success" , 4000L )
) . assignTimestampsAndWatermarks ( WatermarkStrategy . < Tuple4 < String ,
String , String , Long > > forMonotonousTimestamps ( )
. withTimestampAssigner ( new SerializableTimestampAssigner < Tuple4 < String , String , String , Long > > ( ) {
@Override
public long extractTimestamp ( Tuple4 < String , String , String , Long >
element, long recordTimestamp) {
return element. f3;
}
} )
) ;
SingleOutputStreamOperator < String > result = appStream
. connect ( thirdpartStream)
. keyBy ( t -> t. f0, t -> t. f0)
. process ( new CoProcessFunction < Tuple3 < String , String , Long > , Tuple4 < String , String , String , Long > , String > ( ) {
private ValueState < Tuple3 < String , String , Long > > appEventState;
private ValueState < Tuple4 < String , String , String , Long > > thirdPartyEventState;
@Override
public void open ( Configuration parameters) throws Exception {
appEventState = getRuntimeContext ( ) . getState ( new ValueStateDescriptor < Tuple3 < String , String , Long > > ( "appEventState" , Types . TUPLE ( Types . STRING, Types . STRING, Types . LONG) ) ) ;
thirdPartyEventState = getRuntimeContext ( ) . getState ( new ValueStateDescriptor < Tuple4 < String , String , String , Long > > ( "thirdPartyEventState" , Types . TUPLE ( Types . STRING, Types . STRING, Types . STRING, Types . LONG) ) ) ;
}
@Override
public void processElement1 ( Tuple3 < String , String , Long > value, Context ctx, Collector < String > out) throws Exception {
if ( thirdPartyEventState. value ( ) != null ) {
out. collect ( "对账成功 : " + value + " " + thirdPartyEventState. value ( ) ) ;
thirdPartyEventState. clear ( ) ;
} else {
appEventState. update ( value) ;
ctx. timerService ( ) . registerEventTimeTimer ( value. f2 + 5000L ) ;
}
}
@Override
public void processElement2 ( Tuple4 < String , String , String , Long > value, Context ctx, Collector < String > out) throws Exception {
if ( appEventState. value ( ) != null ) {
out. collect ( "对账成功:" + appEventState. value ( ) + " " + value) ;
appEventState. clear ( ) ;
} else {
thirdPartyEventState. update ( value) ;
ctx. timerService ( ) . registerEventTimeTimer ( value. f3 + 5000L ) ;
}
}
@Override
public void onTimer ( long timestamp, OnTimerContext ctx, Collector < String > out) throws Exception {
if ( appEventState. value ( ) != null ) {
out. collect ( "对账失败:" + appEventState. value ( ) + " " + "第三方支付平台信息未到" ) ;
}
if ( thirdPartyEventState. value ( ) != null ) {
out. collect ( "对账失败:" + thirdPartyEventState. value ( ) + " " + "app信息未到" ) ;
}
appEventState. clear ( ) ;
thirdPartyEventState. clear ( ) ;
}
} ) ;
result. print ( "output " ) ;
env. execute ( ) ;
}
}
2.6、ProcessJoinFunction
2.6.1、函数详解
public abstract class ProcessJoinFunction < IN1, IN2, OUT> extends AbstractRichFunction {
public abstract void processElement ( IN1 left, IN2 right, Context ctx, Collector < OUT> out)
throws Exception ;
2.6.2、基本使用
package com. hpsk. flink. function ;
import com. hpsk. flink. beans. Event ;
import org. apache. flink. api. common. eventtime. SerializableTimestampAssigner ;
import org. apache. flink. api. common. eventtime. WatermarkStrategy ;
import org. apache. flink. api. java. tuple. Tuple3 ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. co. ProcessJoinFunction ;
import org. apache. flink. streaming. api. windowing. time. Time ;
import org. apache. flink. util. Collector ;
import java. sql. Timestamp ;
public class ProcessJoinFunctionDS {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
SingleOutputStreamOperator < Tuple3 < String , String , Long > > orderStream = env. fromElements (
Tuple3 . of ( "Mary" , "order-1" , 5000L ) ,
Tuple3 . of ( "Alice" , "order-2" , 5000L ) ,
Tuple3 . of ( "Bob" , "order-3" , 20000L ) ,
Tuple3 . of ( "Alice" , "order-4" , 20000L ) ,
Tuple3 . of ( "Cary" , "order-5" , 51000L )
) . assignTimestampsAndWatermarks ( WatermarkStrategy . < Tuple3 < String ,
String , Long > > forMonotonousTimestamps ( )
. withTimestampAssigner ( new SerializableTimestampAssigner < Tuple3 < String , String , Long > > ( ) {
@Override
public long extractTimestamp ( Tuple3 < String , String , Long > element, long recordTimestamp) {
return element. f2;
}
} )
) ;
SingleOutputStreamOperator < Event > clickStream = env. fromElements (
new Event ( "Bob" , "./cart" , 2000L ) ,
new Event ( "Alice" , "./prod?id=100" , 3000L ) ,
new Event ( "Alice" , "./prod?id=200" , 3500L ) ,
new Event ( "Bob" , "./prod?id=2" , 2500L ) ,
new Event ( "Alice" , "./prod?id=300" , 36000L ) ,
new Event ( "Bob" , "./home" , 30000L ) ,
new Event ( "Bob" , "./prod?id=1" , 23000L ) ,
new Event ( "Bob" , "./prod?id=3" , 33000L )
) . assignTimestampsAndWatermarks ( WatermarkStrategy . < Event > forMonotonousTimestamps ( )
. withTimestampAssigner ( new SerializableTimestampAssigner < Event > ( ) {
@Override
public long extractTimestamp ( Event element, long recordTimestamp) {
return element. timestamp;
}
} )
) ;
SingleOutputStreamOperator < String > result = orderStream
. keyBy ( t -> t. f0)
. intervalJoin ( clickStream. keyBy ( t -> t. user) )
. between ( Time . seconds ( - 5 ) , Time . seconds ( 10 ) )
. process ( new ProcessJoinFunction < Tuple3 < String , String , Long > , Event , String > ( ) {
@Override
public void processElement ( Tuple3 < String , String , Long > left, Event right, Context ctx, Collector < String > out) throws Exception {
out. collect ( right + " => {" + left. f0 + ", " + left. f1 + ", " + new Timestamp ( left. f2) + "}" ) ;
}
} ) ;
result. print ( "output " ) ;
env. execute ( ) ;
}
}
2.7、BroadcastProcessFunction
2.7.1、函数详解
public abstract class BroadcastProcessFunction < IN1, IN2, OUT> extends BaseBroadcastProcessFunction {
public abstract void processElement (
final IN1 value, final ReadOnlyContext ctx, final Collector < OUT> out) throws Exception ;
public abstract void processBroadcastElement (
final IN2 value, final Context ctx, final Collector < OUT> out) throws Exception ;
2.7.2、基本使用
package com. hpsk. flink. function ;
import org. apache. flink. api. common. state. BroadcastState ;
import org. apache. flink. api. common. state. MapStateDescriptor ;
import org. apache. flink. api. common. state. ReadOnlyBroadcastState ;
import org. apache. flink. api. java. tuple. Tuple2 ;
import org. apache. flink. streaming. api. datastream. BroadcastStream ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. co. BroadcastProcessFunction ;
import org. apache. flink. util. Collector ;
public class BroadcastProcessFunctionDS {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
SingleOutputStreamOperator < String > tableConfigStream = env. fromElements (
"table1,createTable" ,
"table2,createTable" ,
"table3,createTable" ) ;
SingleOutputStreamOperator < Tuple2 < String , String > > MySqlTableStream = env. fromElements (
Tuple2 . of ( "table1" , "data" ) ,
Tuple2 . of ( "table2" , "data" ) ,
Tuple2 . of ( "table4" , "data" )
) ;
MapStateDescriptor < String , String > mapStateDescriptor = new MapStateDescriptor < > ( "map-state" , String . class , String . class ) ;
BroadcastStream < String > broadcast = tableConfigStream. broadcast ( mapStateDescriptor) ;
SingleOutputStreamOperator < String > result = MySqlTableStream
. connect ( broadcast)
. process ( new MyBroadcastProcessFunction ( mapStateDescriptor) ) ;
result. print ( "output " ) ;
env. execute ( ) ;
}
public static class MyBroadcastProcessFunction extends BroadcastProcessFunction < Tuple2 < String , String > , String , String > {
private MapStateDescriptor < String , String > mapStateDescriptor;
public MyBroadcastProcessFunction ( MapStateDescriptor < String , String > mapStateDescriptor) {
this . mapStateDescriptor = mapStateDescriptor;
}
@Override
public void processBroadcastElement ( String value, Context ctx, Collector < String > out) throws Exception {
BroadcastState < String , String > configBroadcast = ctx. getBroadcastState ( mapStateDescriptor) ;
String [ ] split = value. split ( "," ) ;
configBroadcast. put ( split[ 0 ] . trim ( ) , split[ 1 ] . trim ( ) ) ;
}
@Override
public void processElement ( Tuple2 < String , String > value, ReadOnlyContext ctx, Collector < String > out) throws Exception {
ReadOnlyBroadcastState < String , String > broadcastState = ctx. getBroadcastState ( mapStateDescriptor) ;
String table = value. f0;
String create = broadcastState. get ( table) ;
if ( create != null ) {
out. collect ( value. f0 + "为配置表,需要在phoenix中建表 -> 建表语句:" + create + ", 数据为:" + value. f1) ;
} else {
out. collect ( value. f0 + "业务表, 跳过建表" ) ;
}
}
}
}
2.8、KeyedBroadcastProcessFunction
2.8.1、函数详解
public abstract class KeyedBroadcastProcessFunction < KS, IN1, IN2, OUT>
extends BaseBroadcastProcessFunction {
public abstract void processElement (
final IN1 value, final ReadOnlyContext ctx, final Collector < OUT> out) throws Exception ;
public abstract void processBroadcastElement (
final IN2 value, final Context ctx, final Collector < OUT> out) throws Exception ;
public void onTimer ( final long timestamp, final OnTimerContext ctx, final Collector < OUT> out)
throws Exception { }
2.8.2、基本使用
package com. hpsk. flink. function ;
import com. hpsk. flink. beans. Event ;
import com. hpsk. flink. source. EventWithWatermarkSource ;
import org. apache. flink. api. common. state. BroadcastState ;
import org. apache. flink. api. common. state. MapState ;
import org. apache. flink. api. common. state. MapStateDescriptor ;
import org. apache. flink. api. common. state. ReadOnlyBroadcastState ;
import org. apache. flink. configuration. Configuration ;
import org. apache. flink. streaming. api. datastream. BroadcastStream ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. co. KeyedBroadcastProcessFunction ;
import org. apache. flink. util. Collector ;
public class KeyedBroadcastProcessFunctionDS {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
SingleOutputStreamOperator < String > tableConfigStream = env. fromElements ( "Tom" ) ;
SingleOutputStreamOperator < Event > MySqlTableStream = env. addSource ( new EventWithWatermarkSource ( ) ) ;
MapStateDescriptor < String , String > mapStateDescriptor = new MapStateDescriptor < > ( "map-state" , String . class , String . class ) ;
BroadcastStream < String > broadcast = tableConfigStream. broadcast ( mapStateDescriptor) ;
SingleOutputStreamOperator < String > result = MySqlTableStream
. keyBy ( t -> t. user)
. connect ( broadcast)
. process ( new MyBroadcastProcessFunction ( mapStateDescriptor) ) ;
result. print ( "output " ) ;
env. execute ( ) ;
}
public static class MyBroadcastProcessFunction extends KeyedBroadcastProcessFunction < String , Event , String , String > {
private MapStateDescriptor < String , String > mapStateDescriptor;
private MapState < String , Long > eventMapState;
@Override
public void open ( Configuration parameters) throws Exception {
eventMapState = getRuntimeContext ( ) . getMapState ( new MapStateDescriptor < String , Long > ( "event-map-state" , String . class , Long . class ) ) ;
}
public MyBroadcastProcessFunction ( MapStateDescriptor < String , String > mapStateDescriptor) {
this . mapStateDescriptor = mapStateDescriptor;
}
@Override
public void processElement ( Event value, ReadOnlyContext ctx, Collector < String > out) throws Exception {
if ( eventMapState. contains ( value. user) ) {
Long num = eventMapState. get ( value. user) ;
eventMapState. put ( value. user, num + 1 ) ;
} else {
eventMapState. put ( value. user, 1L ) ;
}
ReadOnlyBroadcastState < String , String > broadcastState = ctx. getBroadcastState ( mapStateDescriptor) ;
String user = value. user;
String userConfig = broadcastState. get ( user) ;
if ( userConfig != null ) {
Long aLong = eventMapState. get ( value. user) ;
out. collect ( "用户 " + value. user + " 访问次数 -> " + aLong) ;
}
}
@Override
public void processBroadcastElement ( String value, Context ctx, Collector < String > out) throws Exception {
BroadcastState < String , String > configBroadcast = ctx. getBroadcastState ( mapStateDescriptor) ;
configBroadcast. put ( value, value) ;
}
}
}
三、热门url案例
3.1、需求
统计最近10秒内最热门的url链接,并且每5秒钟更新一次
3.2、实现代码
3.2.1、Event类
package com. hpsk. flink. beans ;
import java. sql. Timestamp ;
public class Event {
public String user;
public String url;
public Long timestamp;
public Event ( ) {
}
public Event ( String user, String url, Long timestamp) {
this . user = user;
this . url = url;
this . timestamp = timestamp;
}
@Override
public String toString ( ) {
return "Event{" +
"user='" + user + '\'' +
", url='" + url + '\'' +
", timestamp=" + new Timestamp ( timestamp) +
'}' ;
}
}
3.2.2、UrlViewCount类
package com. hpsk. flink. beans ;
import java. sql. Timestamp ;
public class UrlViewCount {
public Long windowStart;
public Long windowEnd;
public String url;
public Long count;
public UrlViewCount ( ) {
}
public UrlViewCount ( Long windowStart, Long windowEnd, String url, Long count) {
this . windowStart = windowStart;
this . windowEnd = windowEnd;
this . url = url;
this . count = count;
}
@Override
public String toString ( ) {
return "UrlViewCount{" +
"windowStart=" + new Timestamp ( windowStart) +
", windowEnd=" + new Timestamp ( windowEnd) +
", url='" + url +
", count=" + count +
'}' ;
}
}
3.2.3、TopNExample类
package com. hpsk. flink. demand ;
import com. hpsk. flink. beans. Event ;
import com. hpsk. flink. beans. UrlViewCount ;
import com. hpsk. flink. source. EventWithWatermarkSource ;
import org. apache. flink. api. common. functions. AggregateFunction ;
import org. apache. flink. api. java. tuple. Tuple2 ;
import org. apache. flink. streaming. api. datastream. DataStream ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. windowing. ProcessAllWindowFunction ;
import org. apache. flink. streaming. api. windowing. assigners. SlidingEventTimeWindows ;
import org. apache. flink. streaming. api. windowing. time. Time ;
import org. apache. flink. streaming. api. windowing. windows. TimeWindow ;
import org. apache. flink. util. Collector ;
import java. util. ArrayList ;
import java. util. Comparator ;
import java. util. HashMap ;
public class TopNExample {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
DataStream < Event > inputDS = env. addSource ( new EventWithWatermarkSource ( ) ) ;
inputDS. print ( "input " ) ;
SingleOutputStreamOperator < UrlViewCount > result = inputDS
. keyBy ( t -> t. url)
. windowAll ( SlidingEventTimeWindows . of ( Time . seconds ( 10 ) , Time . seconds ( 5 ) ) )
. aggregate ( new UrlHashViewCountAgg ( ) , new UrlHashViewCountResult ( ) ) ;
result. print ( "output " ) ;
env. execute ( ) ;
}
public static class UrlHashViewCountAgg implements AggregateFunction < Event , HashMap < String , Long > , ArrayList < Tuple2 < String , Long > > > {
@Override
public HashMap < String , Long > createAccumulator ( ) {
return new HashMap < > ( ) ;
}
@Override
public HashMap < String , Long > add ( Event value, HashMap < String , Long > accumulator) {
if ( accumulator. containsKey ( value. url) ) {
Long count = accumulator. get ( value. url) ;
accumulator. put ( value. url, count + 1 ) ;
} else {
accumulator. put ( value. url, 1L ) ;
}
return accumulator;
}
@Override
public ArrayList < Tuple2 < String , Long > > getResult ( HashMap < String , Long > accumulator) {
ArrayList < Tuple2 < String , Long > > result = new ArrayList < > ( ) ;
for ( String key : accumulator. keySet ( ) ) {
result. add ( Tuple2 . of ( key, accumulator. get ( key) ) ) ;
}
result. sort ( new Comparator < Tuple2 < String , Long > > ( ) {
@Override
public int compare ( Tuple2 < String , Long > o1, Tuple2 < String , Long > o2) {
return o2. f1. compareTo ( o1. f1) ;
}
} ) ;
return result;
}
@Override
public HashMap < String , Long > merge ( HashMap < String , Long > a, HashMap < String , Long > b) {
return null ;
}
}
private static class UrlHashViewCountResult extends ProcessAllWindowFunction < ArrayList < Tuple2 < String , Long > > , UrlViewCount , TimeWindow > {
@Override
public void process ( Context ctx, Iterable < ArrayList < Tuple2 < String , Long > > > elements, Collector < UrlViewCount > out) throws Exception {
long start = ctx. window ( ) . getStart ( ) ;
long end = ctx. window ( ) . getEnd ( ) ;
ArrayList < Tuple2 < String , Long > > list = elements. iterator ( ) . next ( ) ;
for ( int i = 0 ; i < Math . min ( list. size ( ) , 2 ) ; i++ ) {
out. collect ( new UrlViewCount ( start, end, list. get ( i) . f0, list. get ( i) . f1) ) ;
}
}
}
}
3.3、代码改进
3.3.1、TopNExample类不足
利用windowAll方法将数据放入一个窗口进行统计,失去了并行计算的意义。
3.3.2、改进代码
package com. hpsk. flink. demand ;
import com. hpsk. flink. beans. Event ;
import com. hpsk. flink. beans. UrlViewCount ;
import com. hpsk. flink. source. EventWithWatermarkSource ;
import org. apache. flink. api. common. functions. AggregateFunction ;
import org. apache. flink. api. common. state. ListState ;
import org. apache. flink. api. common. state. ListStateDescriptor ;
import org. apache. flink. api. common. typeinfo. Types ;
import org. apache. flink. configuration. Configuration ;
import org. apache. flink. streaming. api. datastream. DataStream ;
import org. apache. flink. streaming. api. datastream. SingleOutputStreamOperator ;
import org. apache. flink. streaming. api. environment. StreamExecutionEnvironment ;
import org. apache. flink. streaming. api. functions. KeyedProcessFunction ;
import org. apache. flink. streaming. api. functions. windowing. ProcessWindowFunction ;
import org. apache. flink. streaming. api. windowing. assigners. SlidingEventTimeWindows ;
import org. apache. flink. streaming. api. windowing. time. Time ;
import org. apache. flink. streaming. api. windowing. windows. TimeWindow ;
import org. apache. flink. util. Collector ;
import java. util. ArrayList ;
import java. util. Comparator ;
public class UrlTopNViewCount {
public static void main ( String [ ] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment . getExecutionEnvironment ( ) ;
env. setParallelism ( 1 ) ;
DataStream < Event > inputDS = env. addSource ( new EventWithWatermarkSource ( ) ) ;
inputDS. print ( "input " ) ;
SingleOutputStreamOperator < UrlViewCount > result = inputDS
. keyBy ( t -> t. url)
. window ( SlidingEventTimeWindows . of ( Time . seconds ( 10 ) , Time . seconds ( 5 ) ) )
. aggregate ( new UrlViewCountAgg ( ) , new UrlViewCountResult ( ) ) ;
SingleOutputStreamOperator < UrlViewCount > topNResult = result
. keyBy ( t -> t. windowEnd)
. process ( new TopNProcessResult ( 2 ) ) ;
topNResult. print ( "output " ) ;
env. execute ( ) ;
}
public static class UrlViewCountAgg implements AggregateFunction < Event , Long , Long > {
@Override
public Long createAccumulator ( ) {
return 0L ;
}
@Override
public Long add ( Event value, Long accumulator) {
return accumulator + 1 ;
}
@Override
public Long getResult ( Long accumulator) {
return accumulator;
}
@Override
public Long merge ( Long a, Long b) {
return null ;
}
}
private static class UrlViewCountResult extends ProcessWindowFunction < Long , UrlViewCount , String , TimeWindow > {
@Override
public void process ( String url, Context ctx, Iterable < Long > elements, Collector < UrlViewCount > out) throws Exception {
long start = ctx. window ( ) . getStart ( ) ;
long end = ctx. window ( ) . getEnd ( ) ;
long count = elements. iterator ( ) . next ( ) ;
out. collect ( new UrlViewCount ( start, end, url, count) ) ;
}
}
private static class TopNProcessResult extends KeyedProcessFunction < Long , UrlViewCount , UrlViewCount > {
private Integer n;
private ListState < UrlViewCount > urlViewCountListState;
public TopNProcessResult ( Integer n) {
this . n = n;
}
@Override
public void open ( Configuration parameters) throws Exception {
urlViewCountListState = getRuntimeContext ( ) . getListState ( new ListStateDescriptor < UrlViewCount > ( "url-list-state" , Types . POJO ( UrlViewCount . class ) ) ) ;
}
@Override
public void processElement ( UrlViewCount value, Context ctx, Collector < UrlViewCount > out) throws Exception {
urlViewCountListState. add ( value) ;
ctx. timerService ( ) . registerEventTimeTimer ( value. windowEnd + 1 ) ;
}
@Override
public void onTimer ( long timestamp, OnTimerContext ctx, Collector < UrlViewCount > out) throws Exception {
ArrayList < UrlViewCount > urlViewCountArrayList = new ArrayList < > ( ) ;
for ( UrlViewCount urlViewCount : urlViewCountListState. get ( ) ) {
urlViewCountArrayList. add ( urlViewCount) ;
}
urlViewCountArrayList. sort ( new Comparator < UrlViewCount > ( ) {
@Override
public int compare ( UrlViewCount o1, UrlViewCount o2) {
return o2. count. compareTo ( o1. count) ;
}
} ) ;
for ( int i = 0 ; i < n; i++ ) {
out. collect ( urlViewCountArrayList. get ( i) ) ;
}
urlViewCountListState. clear ( ) ;
}
}
}