前言
代码原型来自于:flink 使用sql实现kafka生产者和消费者
在这个的基础上,采用了ROW类型数据,进行对象的简化和通用。
Flink和Kafka依赖
Flink版本:1.12.1。
<!-- FlinkSql -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- Flink Connect Kafka -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
生产者producer
public class CreateDDLPorduce {
public static void main(String[] args) {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance()
.inStreamingMode()
//.useOldPlanner() // flink
.useBlinkPlanner() // blink
.build();
StreamTableEnvironment ste = StreamTableEnvironment.create(env, settings);
String ddl = "CREATE TABLE CbryProduce(\n" +
"customerId int,\n" +
"oldStatus int,\n" +
"newStatus int,\n" +
"eventTime bigint\n" +
") WITH(\n" +
"'connector.type'='kafka',\n" +
"'connector.version'='universal',\n" +
"'connector.properties.bootstrap.servers'='KAFKA的IP地址',\n" +
"'connector.topic'='event_topic_1',\n" +
"'format.type'='json'\n" +
")\n"
;
ste.executeSql(ddl);
while (true) {
try {
TimeUnit.SECONDS.sleep(3);
int status = (int) (System.currentTimeMillis() % 3);
String insert = "insert into CbryProduce(customerId,oldStatus,newStatus,eventTime)" +
"values(1024,1," + status + "," + System.currentTimeMillis() + ")";
ste.executeSql(insert);
} catch (Exception ex) {
}
}
}
}
消费者consumer
public class CreateDDLConsumer {
public static void main(String[] args) {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode()
// .useOldPlanner() // flink
.useBlinkPlanner() // blink
.build();
StreamTableEnvironment ste = StreamTableEnvironment.create(env, settings);
String ddl = "CREATE TABLE CbryConsumer(\n" + "customerId int,\n" + "oldStatus int,\n"
+ "newStatus int,\n" + "eventTime bigint\n" + ") WITH(\n" + "'connector.type'='kafka',\n"
+ "'connector.version'='universal',\n" + "'connector.properties.group.id'='event1_group',\n"
+ "'connector.properties.bootstrap.servers'='KAFKA的IP地址',\n"
+ "'connector.topic'='event_topic_1',\n" + "'connector.startup-mode' = 'latest-offset',\n"
+ "'format.type'='json'\n" + ")\n";
ste.executeSql(ddl);
Table queryTable = ste.sqlQuery("select customerId,newStatus as status "
+ " from CbryConsumer" + " where newStatus in(1,2)");
/*
* DataStream<Tuple2<Boolean, Tuple2<Integer, Integer>>> result =
* ste.toRetractStream(resultTb, Types.TUPLE(Types.INT,
* Types.INT));
*
*/
DataStream<Row> result = ste.toAppendStream(queryTable, Row.class);
result.printToErr();
try {
env.execute();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
采用POJO类
我们不难发现:我们生产者(CbryProduce)产生的数据是json格式的:
而消费者(CbryConsumer)获取ROW类型的数据是以,
为分隔符的字符串。如果我们将ROW改为POJO则是:
更多表
以Oracle为例子:
create table oracle_source (
EMPLOYEE_ID BIGINT,
START_DATE TIMESTAMP,
END_DATE TIMESTAMP,
JOB_ID VARCHAR,
DEPARTMENT_ID VARCHAR
) with (
type = 'oracle',
url = 'jdbc:oracle:thin:@//127.0.0.1:1521/ORACLE',
userName = 'userName',
password = 'password',
dbName = 'hr',
tableName = 'job_history',
timeField = 'START_DATE',
startTime = '2007-1-1 00:00:00'
);
2021.5.3
CSV和多表关联插入
String ddl2 = "CREATE TABLE CbryProduce2(\n" +
"cbry_status int\n" +
") WITH(\n" +
"'connector.type'='kafka',\n" +
"'connector.version'='universal',\n" +
"'connector.properties.bootstrap.servers'='localhost:9092',\n" +
"'connector.topic'='event_topic_2',\n" +
"'format.type'='csv',\n" +
"'format.field-delimiter'='|'\n" +
")\n"
;
String ddl3 = "CREATE TABLE CbryConsumer(\n" + "customerId int,\n" + "oldStatus int,\n"
+ "newStatus int,\n" + "eventTime bigint,\n" + "cbry_status int\n" + ") WITH(\n" + "'connector.type'='kafka',\n"
+ "'connector.version'='universal',\n" + "'connector.properties.group.id'='g2_group',\n"
+ "'connector.properties.bootstrap.servers'='localhost:9092',\n"
+ "'connector.topic'='event_topic_3',\n" + "'connector.startup-mode' = 'latest-offset',\n"
+ "'format.type'='csv',\n"
+ "'format.field-delimiter'='|'\n"
+ ")\n";
insertSql = "insert into CbryConsumer select m.customerId, m.oldStatus , m.newStatus as status , m.eventTime , n.cbry_status from CbryProduce m , CbryProduce2 n where m.newStatus=1";