一、pom准备
我用的版本是Flink1.17.1,Java用的是1.8
<dependencies>
<!--导入Flink的相关依赖-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-loader</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-runtime</artifactId>
<version>${flink.version}</version>
</dependency>
<!--//添加Flink Table API相关的依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- Flink默认使用的是slf4j记录日志,相当于一个日志的接口,我们这里使用log4j作为具体的日志实现 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<!-- 用于FlinkKafka-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- 使用lombok的注解-->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
<scope>compile</scope>
</dependency>
</dependencies>
二、KafkaSource
1、利用Flink自带的KafkaSource
封装成一个方法
public static KafkaSource<String> getKafkaSource(String topic,String groupId,String kafkaAdress) {
KafkaSource<String> kafkaSource = KafkaSource.<String>builder()
.setBootstrapServers(kafkaAdress)
.setTopics(topic)
.setGroupId(groupId)
//初始化读取最新数据
.setStartingOffsets(OffsetsInitializer.earliest())
//如下这种方式不能处理空消息
// .setValueOnlyDeserializer(new SimpleStringSchema())
//为了处理读取到空消息的情况,需要自定义反序列化器
.setValueOnlyDeserializer(
new DeserializationSchema<String>() {
@Override
public String deserialize(byte[] message) throws IOException {
if (message != null) {
return new String(message);
}
return null;
}
@Override
public boolean isEndOfStream(String nextElement) {
return false;
}
@Override
public TypeInformation<String> getProducedType() {
return TypeInformation.of(String.class);
}
}
).build();
return kafkaSource;
}
在主程序里面调用
public class Key_Vehicle_Project {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
KafkaSource<String> kafkaSource = FlinkSourceUtil.getKafkaSource("test_topic", "test","localhost:9092");
kafkaSource.print();
env.execute("Flink from Kafka!");
}
}
2、创建一个新的KafkaConsumer
public class Main {
public static void main(String[] args) throws Exception {
// 创建执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 创建kafka Consumer
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","localhost:9092");
properties.setProperty("group.id", groupID);
FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<String>("test_topic",new SimpleStringSchema(),properties);
myConsumer.setStartFromLatest();
// 数据接入
DataStream<String> dataStream = env.addSource(myConsumer);
dataStream.print();
env.execute("Flink from Kafka");
}
}
3、利用FlinkSQL读取Kafka中的表
先封装一个获取Kafka连接属性的方法
//获取kafka连接器的连接属性
public static String getKafkaDDL(String topic, String groupId,String kafkaAdress) {
return " WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = '" + topic + "',\n" +
" 'properties.bootstrap.servers' = '" + kafkaAdress + "',\n" +
" 'properties.group.id' = '" + groupId + "',\n" +
" 'scan.startup.mode' = 'latest-offset',\n" +
" 'format' = 'json'\n" +
")";
}
再创建一个 动态表
//从test_topic主题中读取数据 并创建动态表
public void readOdsDb(StreamTableEnvironment tableEnv, String groupId) {
tableEnv.executeSql("CREATE TABLE test_topic(\n" +
" `database` string,\n" +
" `table` string,\n" +
" `type` string,\n" +
" `data` map<string,string>,\n" +
" `old` map<string,string>,\n" +
" `ts` bigint,\n" +
" `pt` as proctime(),\n" +
" `et` as TO_TIMESTAMP_LTZ(ts, 0),\n" +
" WATERMARK FOR `et` AS `et`\n" +
") " + SQLUtil.getKafkaDDL(test_topic, groupId));
}
创建环境,并查询Kafka的数据
三、KafkaSink
1、利用KafkaSink
封装成一个方法
public static KafkaSink<String> getKafkaSink(String sinkTopic,String kafkaAdress){
KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
.setBootstrapServers(kafkaAdress)
//写入的精准一次
.setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
.setRecordSerializer(KafkaRecordSerializationSchema.<String>builder()
.setTopic(sinkTopic)
.setValueSerializationSchema(new SimpleStringSchema())
.build())
.build();
return kafkaSink;
}
在主方法中调用
public class Key_Vehicle_Project {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
KafkaSource<String> kafkaSource = FlinkSourceUtil.getKafkaSource("test_topic", "test","localhost:9092");
kafkaSource.print();
kafkaOutputDS.sinkTo(FlinkSinkUtil.getKafkaSink("test_topic2","localhost:9092"));
env.execute("Flink to Kafka!");
}
}
2、利用FlinkSQL将envtable中的数据传输到Kafka
封装一个方法用于updateKafka
//获取upsert-kafka连接器的连接属性
public static String getUpsertKafkaDDL(String sinkTopic,String kafkaAdress) {
return "WITH (\n" +
" 'connector' = 'upsert-kafka',\n" +
" 'topic' = '" + sinkTopic + "',\n" +
" 'properties.bootstrap.servers' = '" + kafkaAdress + "',\n" +
" 'key.format' = 'json',\n" +
" 'value.format' = 'json'\n" +
")";
}
在主程序中用getUpsertKafkaDDL方法,tableResult是生成的envTable
tableEnv.executeSql(
"create table student(\n" +
" id int,\n" +
" name string,\n"
" PRIMARY KEY (id) NOT ENFORCED\n" +
")"
+ SQLUtil.getUpsertKafkaDDL("test_topic","localhost:9092"));
tableEnv.executeSql("insert into student select * from "+tableResult);
tableEnv.executeSql("select * from student").print();
3、创建一个KafkaProduce
public class ProducerDemo {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers","localhost:9092");
props.put("acks", "all"); props.put("retries", 0);
props.put("batch.size", 16384); props.put("linger.ms", 1);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
Producer<String, String> producer = new KafkaProducer<String, String>(props);
String jsonMessage = "1 jojo";
producer.send(new ProducerRecord<String, String>("test_topic",jsonMessage));
System.out.println("Message sent successfully");
producer.close();
}
}