Table & SQL API 是一种关系型 API,用户可以像操作 mysql 数据库表一样的操作数据,而不需要写java 代码完成 Flink Function,更不需要手工的优化 java 代码调优。
BatchTableEnvironment (离线批处理Table API)
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);
StreamTableEnvironment (实时流处理Table API)
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
一、flink-sql基本使用
1、导入flink-sql依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
2、Table Sql案例
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import static org.apache.flink.table.api.Expressions.$;
public class SQLWordCount {
public static void main(String[] args) throws Exception {
//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});
tEnv.createTemporaryView("v_word_count", wordAndOne, $("word"), $("counts"));
Table table = tEnv.sqlQuery("SELECT word, sum(counts) counts FROM v_word_count GROUP BY word");
//table.printSchema();
//可更新的数据流
DataStream<Tuple2<Boolean, Row>> dataStream = tEnv.toRetractStream(table, Row.class);
dataStream.print();
env.execute("SQLWordCount");
}
}
3、Table API 案例
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import static org.apache.flink.table.api.Expressions.$;
public class TableWordCount {
public static void main(String[] args) throws Exception {
//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});
Table table = tEnv.fromDataStream(wordAndOne, $("word"), $("one"));
//调用Table API(DSL)
Table res = table.groupBy($("word"))
.select($("word"), $("one").sum().as("counts"));
DataStream<Tuple2<Boolean, Row>> retractStream = tEnv.toRetractStream(res, Row.class);
retractStream.print();
env.execute("TableWordCount");
}
}
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import static org.apache.flink.table.api.Expressions.$;
public class AppendStreanDemo {
public static void main(String[] args) throws Exception {
//env -> DataStream
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//将StreamExecutionEnvironment进行增强,才能写SQL
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
//spark,hadoop,flink,hive
//spark,flink,flink
//flink,flink
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
//将DataStream变成结构化数据
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = lines.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = line.split(",");
for (String word : words) {
out.collect(Tuple2.of(word, 1));
}
}
});
Table table = tEnv.fromDataStream(wordAndOne, $("word"), $("one"));
Table res = table.select($("word").upperCase(), $("one"));
DataStream<Row> rowDataStream = tEnv.toAppendStream(res, Row.class); //可追加的数据流
rowDataStream.print();
env.execute("TableWordCount");
}
}
4、按照EventTime划分滚动窗口聚合
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import static org.apache.flink.table.api.Expressions.$;
public class SqlTumblingEventTimeWindow {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//1000,u1,p1,5
//2000,u1,p1,5
//2000,u2,p1,3
//3000,u1,p1,5
//9999,u2,p1,3
DataStreamSource<String> socketTextStream = env.socketTextStream("localhost", 8888);
SingleOutputStreamOperator<Row> rowDataStream = socketTextStream.map(
new MapFunction<String, Row>() {
@Override
public Row map(String line) throws Exception {
String[] fields = line.split(",");
Long time = Long.parseLong(fields[0]);
String uid = fields[1];
String pid = fields[2];
Double money = Double.parseDouble(fields[3]);
return Row.of(time, uid, pid, money);
}
}).returns(Types.ROW(Types.LONG, Types.STRING, Types.STRING, Types.DOUBLE));
//提取数据中的EventTime并生成WaterMark
DataStream<Row> waterMarksRow = rowDataStream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Row>(Time.seconds(0)) {
@Override
public long extractTimestamp(Row row) {
return (long) row.getField(0);
}
});
//将DataStream注册成表并指定schema信息
tableEnv.createTemporaryView("t_orders", waterMarksRow, $("time"), $("uid"), $("pid"), $("money"), $("aaa").rowtime());
//tableEnv.registerDataStream("t_orders", waterMarksRow, "time, uid, pid, money, rowtime.rowtime");
//使用SQL实现按照EventTime划分滚动窗口聚合
String sql = "SELECT uid, SUM(money) total_money, TUMBLE_START(aaa, INTERVAL '10' SECOND) as win_start, " +
"TUMBLE_END(aaa, INTERVAL '10' SECOND) as win_end " +
"FROM t_orders GROUP BY TUMBLE(aaa, INTERVAL '10' SECOND), uid";
Table table = tableEnv.sqlQuery(sql);
//使用TableEnv将table转成AppendStream
DataStream<Row> result = tableEnv.toAppendStream(table, Row.class);
result.print();
env.execute();
}
}
5、按照EventTime划分滑动窗口聚合
package cn._51doit.flink.day11.stream;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import static org.apache.flink.table.api.Expressions.$;
public class SqlSlidingEventTimeWindows {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//1000,u1,p1,5
//2000,u1,p1,5
//2000,u2,p1,3
//3000,u1,p1,5
//9999,u2,p1,3
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
DataStream<Row> rowDataStream = lines.map(new MapFunction<String, Row>() {
@Override
public Row map(String line) throws Exception {
String[] fields = line.split(",");
Long time = Long.parseLong(fields[0]);
String uid = fields[1];
String pid = fields[2];
Double money = Double.parseDouble(fields[3]);
return Row.of(time, uid, pid, money);
}
}).returns(Types.ROW(Types.LONG, Types.STRING, Types.STRING, Types.DOUBLE));
//提取数据中的EventTime并生成WaterMark
DataStream<Row> waterMarksRow = rowDataStream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Row>(Time.seconds(0)) {
@Override
public long extractTimestamp(Row row) {
return (long) row.getField(0);
}
});
//将DataStream注册成表并指定schema信息
//tableEnv.registerDataStream("t_orders", waterMarksRow, "time, uid, pid, money, rowtime.rowtime");
tableEnv.createTemporaryView("t_orders", waterMarksRow, $("time"), $("uid"), $("pid"), $("money"), $("rowtime").rowtime());
//使用SQL实现按照EventTime划分滑动窗口聚合
String sql = "SELECT uid, SUM(money) total_money, HOP_END(rowtime, INTERVAL '2' SECOND, INTERVAL '10' SECOND) as widEnd" +
" FROM t_orders GROUP BY HOP(rowtime, INTERVAL '2' SECOND, INTERVAL '10' SECOND), uid";
Table table = tableEnv.sqlQuery(sql);
//使用TableEnv将table转成AppendStream
DataStream<Row> result = tableEnv.toAppendStream(table, Row.class);
result.print();
env.execute();
}
}
6、自定义函数
package cn._51doit.flink.day11.stream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import static org.apache.flink.table.api.Expressions.$;
public class UDFSQLDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//注册一个可以Cache的文件,通过网络发送给TaskManager
env.registerCachedFile("/Users/xing/Desktop/ip.txt", "ip-rules");
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//106.121.4.252
//42.57.88.186
DataStreamSource<String> socketTextStream = env.socketTextStream("localhost", 8888);
tableEnv.createTemporaryView("t_lines", socketTextStream, $("ip"));
//注册自定义函数,是一个UDF,输入一个IP地址,返回Row<省、市>
tableEnv.createTemporarySystemFunction("ip2Location", IpLocation.class);
//tableEnv.registerFunction("split", new Split("\\W+"));
Table table = tableEnv.sqlQuery(
"SELECT ip, ip2Location(ip) location FROM t_lines");
tableEnv.toAppendStream(table, Row.class).print();
env.execute();
}
}
7、从kafka里读数据
package cn._51doit.flink.day11.stream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class SQLKafkaTable {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
tEnv.executeSql(
"CREATE TABLE KafkaTable (\n" +
" `user_id` BIGINT,\n" +
" `item_id` BIGINT,\n" +
" `behavior` STRING,\n" +
" `ts` TIMESTAMP(3) METADATA FROM 'timestamp'\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'kafka-csv',\n" +
" 'properties.bootstrap.servers' = 'node-1.51doit.cn:9092,node-2.51doit.cn:9092,node-3.51doit.cn:9092',\n" +
" 'properties.group.id' = 'testGroup',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'format' = 'csv'\n" +
")"
);
Table table = tEnv.sqlQuery("SELECT * FROM KafkaTable");
DataStream<Row> appendStream = tEnv.toAppendStream(table, Row.class);
appendStream.print();
env.execute();
}
}

8、读取json格式数据
导入依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
package cn._51doit.flink.day11.stream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class SQLKafkaTableJSON {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
tEnv.executeSql(
"CREATE TABLE KafkaTable2 (\n" +
" `ts` TIMESTAMP(3) METADATA FROM 'timestamp',\n" +
" `user_id` BIGINT,\n" +
" `item_id` BIGINT,\n" +
" `behavior` STRING\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'kafka-json',\n" +
" 'properties.bootstrap.servers' = 'node-1.51doit.cn:9092,node-2.51doit.cn:9092,node-3.51doit.cn:9092',\n" +
" 'properties.group.id' = 'testGroup2',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'format' = 'json',\n" +
" 'json.ignore-parse-errors' = 'true'\n" +
")"
);
Table table = tEnv.sqlQuery("SELECT * FROM KafkaTable2");
DataStream<Row> appendStream = tEnv.toAppendStream(table, Row.class);
appendStream.print();
env.execute();
}
}
二、flink-sql 复杂使用
package cn._51doit.flink.sql;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class MysqlDimDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
tEnv.executeSql(
"CREATE TABLE kafka_item (\n" +
" `user_id` BIGINT,\n" +
" `item_id` BIGINT,\n" +
" `category_id` BIGINT,\n" +
" `action` STRING,\n" +
" `ts` TIMESTAMP(3) METADATA FROM 'timestamp',\n" +
" proctime as PROCTIME(), --通过计算列产生一个处理时间列\n" +
" eventTime as ts, -- 事件时间\n" +
" WATERMARK FOR eventTime as eventTime - INTERVAL '5' SECOND -- 在eventTime上定义watermark\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'kafka-csv2',\n" +
" 'properties.bootstrap.servers' = 'node-1.51doit.cn:9092,node-2.51doit.cn:9092,node-3.51doit.cn:9092',\n" +
" 'properties.group.id' = 'test456',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'format' = 'csv'\n" +
")"
);
//mysql里的分类表
tEnv.executeSql(
"CREATE TABLE mysql_category_dim (\n" +
" id BIGINT,\n" +
" name VARCHAR\n" +
") WITH (\n" +
" 'connector.type' = 'jdbc',\n" +
" 'connector.url' = 'jdbc:mysql://node-3.51doit.cn:3306/aaa?characterEncoding=utf-8',\n" +
" 'connector.table' = 'tb_category',\n" +
" 'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
" 'connector.username' = 'root',\n" +
" 'connector.password' = '123456',\n" +
" 'connector.lookup.cache.max-rows' = '5000',\n" +
" 'connector.lookup.cache.ttl' = '10min'\n" +
")"
);
//将kafka里数据和mysql进行关联
tEnv.executeSql(
"CREATE VIEW v_item_categroy AS\n" +
"SELECT\n" +
" i.user_id, \n" +
" i.item_id, \n" +
" i.category_id, \n" +
" i.action, \n" +
" c.name \n" +
"FROM kafka_item AS i LEFT JOIN mysql_category_dim FOR SYSTEM_TIME AS OF i.proctime AS c \n" +
"ON i.category_id = c.id"
);
//创建一张表,数据存储在MySQL中
tEnv.executeSql(
"CREATE TABLE result_sink (\n" +
" user_id BIGINT,\n" +
" item_id BIGINT,\n" +
" category_id BIGINT,\n" +
" action VARCHAR,\n" +
" name VARCHAR,\n" +
" PRIMARY KEY (user_id) NOT ENFORCED\n" +
") WITH (\n" +
" 'connector' = 'jdbc',\n" +
" 'url' = 'jdbc:mysql://node-3.51doit.cn:3306/aaa?characterEncoding=utf-8',\n" +
" 'table-name' = 'tb_out2',\n" +
" 'username' = 'root',\n" +
" 'password' = '123456'\n" +
")"
);
TableResult tableResult = tEnv.executeSql(
"INSERT INTO result_sink \n" +
"SELECT \n" +
" user_id, item_id, category_id, action, name \n" +
"FROM v_item_categroy"
);
Table t = tEnv.sqlQuery("select * from v_item_categroy");
DataStream<Tuple2<Boolean, Row>> tuple2DataStream = tEnv.toRetractStream(t, Row.class);
//使用JDBC SINk将数据写入到ClickHouse
//tuple2DataStream.addSink()
tEnv.execute("aaa");
}
}
三、动态规则引擎
Drools安装使用参考 Drools安装使用
1、浏览分类的商品次数大于2就触发一次事件
package cn.doitedu;
import cn.doitedu.pojo.Event;
import org.apache.commons.io.FileUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
import org.kie.api.io.ResourceType;
import org.kie.api.runtime.KieSession;
import org.kie.internal.utils.KieHelper;
import java.io.File;
import java.nio.charset.StandardCharsets;
public class FlinkDroolsDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//用户,商品分类ID,事件类型
//u1001,c201,view
//一个用户,在一天之内,浏览了某个分类的商品次数大于2就触发一次事件
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
SingleOutputStreamOperator<Tuple3<String, String, String>> tpStream = lines.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple3.of(fields[0], fields[1], fields[2]);
}
});
//将同一用户的行为数据搞到一起
KeyedStream<Tuple3<String, String, String>, String> keyed = tpStream.keyBy(t -> t.f0);
keyed.process(new KeyedProcessFunction<String, Tuple3<String, String, String>, Tuple3<String, String, String>>() {
private transient MapState<Tuple2<String, String>, Integer> mapState;
private transient KieSession kieSession;
@Override
public void open(Configuration parameters) throws Exception {
//定义一个状态描述器
//((c201,view), 3)
MapStateDescriptor<Tuple2<String, String>, Integer> stateDescriptor = new MapStateDescriptor<>("category-event-count", TypeInformation.of(new TypeHint<Tuple2<String, String>>() {
}), TypeInformation.of(new TypeHint<Integer>() {}));
mapState = getRuntimeContext().getMapState(stateDescriptor);
KieHelper kieHelper = new KieHelper();
//将drools的规则文件转成字符串
String droolsStr = FileUtils.readFileToString(new File("rules/first-demo.drl"), StandardCharsets.UTF_8);
//添加规则
kieHelper.addContent(droolsStr, ResourceType.DRL);
//创建规则匹配的会话(将用户传入的数据和事先添加的规则进行匹配)
kieSession = kieHelper.build().newKieSession();
}
@Override
public void processElement(Tuple3<String, String, String> value, Context ctx, Collector<Tuple3<String, String, String>> out) throws Exception {
String uid = value.f0; //u1001
String cid = value.f1; //c201
String type = value.f2; //view
Tuple2<String, String> key = Tuple2.of(cid, type);
Integer count = mapState.get(key);
if(count == null) {
count = 0;
}
count++;
mapState.put(key, count);
Event event = new Event(type, count, false);
kieSession.insert(event);
kieSession.fireAllRules();
if (event.isHit()) {
//System.out.println("事件满足了!");
out.collect(Tuple3.of(uid, "发优惠卷", "满30减5"));
}
}
}).print();
env.execute();
}
}
2、Drools规则代码保存到MySQL中
建表,写入规则

修改canal

创建topic

消费者

2、广播State实时获取Drools规则数据
把mysql里数据通过canal采集过来,到kakfa里
package cn.doitedu;
import cn.doitedu.pojo.DroolsRulesBean;
import cn.doitedu.pojo.Event;
import cn.doitedu.pojo.RulesStateBean;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.io.FileUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.BroadcastState;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.kie.api.io.ResourceType;
import org.kie.api.runtime.KieSession;
import org.kie.internal.utils.KieHelper;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.Map;
public class FlinkDroolsDemo2 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
String servers = "node-1.51doit.cn:9092,node-2.51doit.cn:9092,node-3.51doit.cn:9092";
FlinkKafkaConsumer<String> rulesSource = KafkaSourceUtil.getKafkaSource(servers, "earliest", "drools-rules19");
FlinkKafkaConsumer<String> eventsSource = KafkaSourceUtil.getKafkaSource(servers, "earliest", "event19");
DataStreamSource<String> rulesStream = env.addSource(rulesSource);
DataStreamSource<String> eventsStream = env.addSource(eventsSource);
SingleOutputStreamOperator<DroolsRulesBean> rulesBeanStream = rulesStream.process(new ProcessFunction<String, DroolsRulesBean>() {
@Override
public void processElement(String line, Context ctx, Collector<DroolsRulesBean> out) throws Exception {
try {
JSONObject jsonObject = JSON.parseObject(line);
String type = jsonObject.getString("type");
if ("INSERT".equals(type) || "UPDATE".equals(type)) {
JSONArray jsonArray = jsonObject.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
DroolsRulesBean bean = jsonArray.getObject(i, DroolsRulesBean.class);
out.collect(bean);
}
}
} catch (Exception e) {
//e.printStackTrace();
}
}
});
MapStateDescriptor<Integer, RulesStateBean> stateDescriptor = new MapStateDescriptor<>("rule-state", Integer.class, RulesStateBean.class);
BroadcastStream<DroolsRulesBean> broadcastStream = rulesBeanStream.broadcast(stateDescriptor);
u1001,c201,view
SingleOutputStreamOperator<Tuple3<String, String, String>> tpStream = eventsStream.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple3.of(fields[0], fields[1], fields[2]);
}
});
//进行KeyBy
tpStream.keyBy(t -> t.f0).connect(broadcastStream).process(new KeyedBroadcastProcessFunction<String, Tuple3<String, String, String>, DroolsRulesBean, Tuple3<String, String, String>>() {
private transient MapState<Tuple2<String, String>, Integer> mapState;
@Override
public void open(Configuration parameters) throws Exception {
//定义一个状态描述器
//((c201,view), 3)
MapStateDescriptor<Tuple2<String, String>, Integer> stateDescriptor = new MapStateDescriptor<>("category-event-count", TypeInformation.of(new TypeHint<Tuple2<String, String>>() {
}), TypeInformation.of(new TypeHint<Integer>() {
}));
mapState = getRuntimeContext().getMapState(stateDescriptor);
}
@Override
public void processElement(Tuple3<String, String, String> value, ReadOnlyContext ctx, Collector<Tuple3<String, String, String>> out) throws Exception {
String uid = value.f0; //u1001
String cid = value.f1; //c201
String type = value.f2; //view
Tuple2<String, String> key = Tuple2.of(cid, type);
Integer count = mapState.get(key);
if (count == null) {
count = 0;
}
count++;
mapState.put(key, count);
Event event = new Event(type, count, false);
Iterable<Map.Entry<Integer, RulesStateBean>> entries = ctx.getBroadcastState(stateDescriptor).immutableEntries();
for (Map.Entry<Integer, RulesStateBean> entry : entries) {
Integer rulesId = entry.getKey();
//KieSession
//SQL
//Params
RulesStateBean rulesStateBean = entry.getValue();
KieSession kieSession = rulesStateBean.getKieSession();
kieSession.insert(event);
//应用所有的规则
kieSession.fireAllRules();
if (event.isHit()) {
out.collect(Tuple3.of(uid, "发优惠卷", "满30减5"));
}
}
}
@Override
public void processBroadcastElement(DroolsRulesBean bean, Context ctx, Collector<Tuple3<String, String, String>> out) throws Exception {
BroadcastState<Integer, RulesStateBean> broadcastState = ctx.getBroadcastState(stateDescriptor);
short status = bean.getStatus();
Integer rulesId = bean.getId();
if (status == 1 || status == 2) {
//添加或更新
KieHelper kieHelper = new KieHelper();
kieHelper.addContent(bean.getCode(), ResourceType.DRL);
KieSession kieSession = kieHelper.build().newKieSession();
RulesStateBean stateBean = new RulesStateBean(rulesId, kieSession, null, bean.getStart_time(), bean.getEnd_time(), bean.getCounts());
broadcastState.put(rulesId, stateBean);
} else if (status == 3) {
//删除
broadcastState.remove(rulesId);
}
}
}).print();
env.execute();
}
}
DroolsRulesBean
package cn.doitedu.pojo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class DroolsRulesBean {
private Integer id;
private String name;
private String code;
private String sql;
private Long start_time;
private Long end_time;
private Integer counts;
private short status;
}
RulesStateBean
package cn.doitedu.pojo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.kie.api.runtime.KieSession;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class RulesStateBean {
private Integer id;
private KieSession kieSession;
private String sql;
private Long start_time;
private Long end_time;
private Integer counts;
}
3、Drools动态调用查询Service、ClickHouse服务
package cn.doitedu;
import cn.doitedu.pojo.*;
import cn.doitedu.service.ClickHouseQueryServiceImpl;
import cn.doitedu.service.QueryService;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.BroadcastState;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.kie.api.io.ResourceType;
import org.kie.api.runtime.KieSession;
import org.kie.internal.utils.KieHelper;
import java.util.Map;
public class FlinkDroolsDemo3 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
String servers = "node-1.51doit.cn:9092,node-2.51doit.cn:9092,node-3.51doit.cn:9092";
FlinkKafkaConsumer<String> rulesSource = KafkaSourceUtil.getKafkaSource(servers, "latest", "drools-rules19");
FlinkKafkaConsumer<String> eventsSource = KafkaSourceUtil.getKafkaSource(servers, "latest", "event19");
DataStreamSource<String> rulesStream = env.addSource(rulesSource);
DataStreamSource<String> eventsStream = env.addSource(eventsSource);
SingleOutputStreamOperator<DroolsRulesBean> rulesBeanStream = rulesStream.process(new ProcessFunction<String, DroolsRulesBean>() {
@Override
public void processElement(String line, Context ctx, Collector<DroolsRulesBean> out) throws Exception {
try {
JSONObject jsonObject = JSON.parseObject(line);
String type = jsonObject.getString("type");
if ("INSERT".equals(type) || "UPDATE".equals(type)) {
JSONArray jsonArray = jsonObject.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
DroolsRulesBean bean = jsonArray.getObject(i, DroolsRulesBean.class);
out.collect(bean);
}
}
} catch (Exception e) {
//e.printStackTrace();
}
}
});
MapStateDescriptor<Integer, RulesStateBean> stateDescriptor = new MapStateDescriptor<>("rule-state", Integer.class, RulesStateBean.class);
BroadcastStream<DroolsRulesBean> broadcastStream = rulesBeanStream.broadcast(stateDescriptor);
u1001,c201,view
SingleOutputStreamOperator<Tuple3<String, String, String>> tpStream = eventsStream.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple3.of(fields[0], fields[1], fields[2]);
}
});
//进行KeyBy
tpStream.keyBy(t -> t.f0).connect(broadcastStream).process(new KeyedBroadcastProcessFunction<String, Tuple3<String, String, String>, DroolsRulesBean, Tuple3<String, String, String>>() {
private transient MapState<Tuple2<String, String>, Integer> mapState;
private transient QueryService queryService;
@Override
public void open(Configuration parameters) throws Exception {
//定义一个状态描述器
//((c201,view), 3)
MapStateDescriptor<Tuple2<String, String>, Integer> stateDescriptor = new MapStateDescriptor<>("category-event-count", TypeInformation.of(new TypeHint<Tuple2<String, String>>() {
}), TypeInformation.of(new TypeHint<Integer>() {
}));
mapState = getRuntimeContext().getMapState(stateDescriptor);
//可以将实现类通过配置传入(类名),通过反射创建其实例
//queryService = (QueryService) Class.forName("cn.doitedu.service.ClickHouseQueryServiceImpl").newInstance();
queryService = new ClickHouseQueryServiceImpl();
}
@Override
public void processElement(Tuple3<String, String, String> value, ReadOnlyContext ctx, Collector<Tuple3<String, String, String>> out) throws Exception {
String uid = value.f0; //u1001
String cid = value.f1; //c201
String type = value.f2; //view
Tuple2<String, String> key = Tuple2.of(cid, type);
Integer count = mapState.get(key);
if (count == null) {
count = 0;
}
count++;
mapState.put(key, count);
Event event = new Event(type, count, false);
Iterable<Map.Entry<Integer, RulesStateBean>> entries = ctx.getBroadcastState(stateDescriptor).immutableEntries();
for (Map.Entry<Integer, RulesStateBean> entry : entries) {
Integer rulesId = entry.getKey();
//KieSession
//SQL
//Params
RulesStateBean rulesStateBean = entry.getValue();
KieSession kieSession = rulesStateBean.getKieSession();
QueryParams queryParams = new QueryParams(rulesStateBean.getSql(), uid, rulesStateBean.getStart_time(), rulesStateBean.getEnd_time(), count, cid);
DroolRuleParam droolRuleParam = new DroolRuleParam(event, queryService, queryParams, false);
//将droolRuleParam(sql, 查询条件)添加到kieSession
kieSession.insert(droolRuleParam);
//应用所有的规则
kieSession.fireAllRules();
if (droolRuleParam.isHit()) {
out.collect(Tuple3.of(uid, "发优惠卷", "满30减5"));
}
}
}
@Override
public void processBroadcastElement(DroolsRulesBean bean, Context ctx, Collector<Tuple3<String, String, String>> out) throws Exception {
BroadcastState<Integer, RulesStateBean> broadcastState = ctx.getBroadcastState(stateDescriptor);
short status = bean.getStatus();
Integer rulesId = bean.getId();
if (status == 1 || status == 2) {
//添加或更新
KieHelper kieHelper = new KieHelper();
kieHelper.addContent(bean.getCode(), ResourceType.DRL);
KieSession kieSession = kieHelper.build().newKieSession();
RulesStateBean stateBean = new RulesStateBean(rulesId, kieSession, bean.getSql(), bean.getStart_time(), bean.getEnd_time(), bean.getCounts());
broadcastState.put(rulesId, stateBean);
} else if (status == 3) {
//删除
broadcastState.remove(rulesId);
}
}
}).print();
env.execute();
}
}
ClickHouseQueryServiceImpl
package cn.doitedu.service;
import cn.doitedu.utils.ClickHouseUtil;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
public class ClickHouseQueryServiceImpl implements QueryService {
@Override
public boolean queryEventCountByRangeTime(String sql, String uid, String cid, String type, long startTime, long endTime, int counts) throws Exception {
Connection connection = ClickHouseUtil.getClickHouseConnection();
PreparedStatement preparedStatement = connection.prepareStatement(sql);
preparedStatement.setString(1, uid);
preparedStatement.setString(2, cid);
preparedStatement.setString(3, type);
preparedStatement.setLong(4, startTime);
preparedStatement.setLong(5, endTime);
int result = 0;
ResultSet rs = preparedStatement.executeQuery();
if(rs.next()) {
result = rs.getInt("counts");
}
System.out.println(result);
return result >= counts;
}
}
DroolRuleParam类
package cn.doitedu.pojo;
import cn.doitedu.service.QueryService;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class DroolRuleParam {
private Event event;
private QueryService queryService;
private QueryParams queryParams;
private boolean hit;
}
QueryParams类
package cn.doitedu.pojo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class QueryParams {
private String sql;
private String uid;
private Long start_time;
private Long end_time;
private Integer counts;
private String category_id;
}
first-demo2.drl 文件
dialect "java"
import cn.doitedu.pojo.Event
import cn.doitedu.pojo.DroolRuleParam
import cn.doitedu.pojo.QueryParams
import cn.doitedu.service.QueryService
rule "rules1"
when
$dr: DroolRuleParam(event != null)
$event: Event(type == "view" && count >= 2) from $dr.event
then
QueryService queryService = $dr.getQueryService();
QueryParams queryParams = $dr.getQueryParams();
String type = $dr.getEvent().getType();
boolean flag = queryService.queryEventCountByRangeTime(queryParams.getSql(), queryParams.getUid(), queryParams.getCategory_id(), type, queryParams.getStart_time(), queryParams.getEnd_time(), queryParams.getCounts());
$dr.setHit(flag);
end
ClickHouseUtil工具类
package cn.doitedu.utils;
import java.sql.Connection;
import java.sql.DriverManager;
/***
* @author hunter.d
* @qq 657270652
* @wx haitao-duan
* @date 2021/3/19
**/
public class ClickHouseUtil {
private static Connection conn = null;
private static final String ckDriver = "ru.yandex.clickhouse.ClickHouseDriver";
private static final String ckUrl = "jdbc:clickhouse://node-3.51doit.cn:8123/default";
public static Connection getClickHouseConnection() throws Exception {
Class.forName(ckDriver);
Connection conn = DriverManager.getConnection(ckUrl);
return conn;
}
}
3315

被折叠的 条评论
为什么被折叠?



