1.创建kafka Topic
可以实现对KafkaTopic的新建操作
import avro.shaded.com.google.common.collect.Lists;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.clients.admin.CreateTopicsResult;
import org.apache.kafka.clients.admin.KafkaAdminClient;
import org.apache.kafka.clients.admin.NewTopic;
import java.util.Properties;
public class CreateKafkaTopic {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put("bootstrap.servers", "124.222.201.62:9092");
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
AdminClient adminClient = null;
try {
adminClient = KafkaAdminClient.create(properties);
NewTopic newTopic = new NewTopic("mytest001", 1, (short) 1);
CreateTopicsResult createTopicsResult = adminClient.createTopics(Lists.newArrayList(newTopic));
createTopicsResult.all().get();
if (createTopicsResult.all().isDone()){
System.out.println("done");
}
} catch (Exception e) {
e.printStackTrace();
}finally {
if(adminClient != null){
adminClient.close();
}
}
}
}
使用命令查看Topic
./kafka-topics.sh --zookeeper 127.0.0.1:2181 --list
2.双流Join操作
package org.flink;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Json;
import org.apache.flink.table.descriptors.Kafka;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
/**
* @author hanjiajun02
* @date 2023/10/18 16:08
*/
public class DataSourceJoin {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "124.222.201.62:9092"); // Kafka服务器地址
properties.setProperty("group.id", "test"); // 消费者组ID
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// 如何配置KafkaSource进行join
KafkaSource<String> kafkaSource = KafkaSource.<String>builder()
.setTopics("quick_start")
.setValueOnlyDeserializer(new SimpleStringSchema())
.setProperties(properties).build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
tableEnv.connect(new Kafka().version("universal").topic("quick_start").startFromLatest().property("bootstrap.servers", "124.222.201.62:9092"))
.withFormat(new Json())
.withSchema( new Schema() // 定义表结构
.field("name", "STRING")
.field("sex", "INT")
.field("id", "INT"))
.createTemporaryTable("MyKafkaTable1");
ProducerRecord<String, String> record = new ProducerRecord<>("quick_start", "{\"name\":\"Shibo\", \"sex\":1, \"id\":1}");
KafkaProducer<String, String> producer = new KafkaProducer<>(properties);
producer.send(record);
tableEnv.executeSql("SELECT * FROM MyKafkaTable1");
tableEnv.connect(new Kafka().version("universal").topic("quick_start").startFromLatest().property("bootstrap.servers", "124.222.201.62:9092"))
.withFormat(new Json())
.withSchema( new Schema() // 定义表结构
.field("name", "STRING")
.field("age", "INT")
.field("id", "INT"))
.createTemporaryTable("MyKafkaTable2");
ProducerRecord<String, String> record_back = new ProducerRecord<>("quick_start", "{\"name\":\"Shibo\", \"age\":1, \"id\":1}");
KafkaProducer<String, String> producer_back = new KafkaProducer<>(properties);
producer_back.send(record_back);
//String sqlJoinQuery = "Select * from MyKafkaTable1, MyKafkaTable2 where MyKafkaTable1.id = MyKafkaTable2.id and MyKafkaTable1.ts between MyKafkaTable2.ts and MyKafkaTable2.ts + Interavl \'1\' minute";
String sqlJoinQuery = "Select * from MyKafkaTable1, MyKafkaTable2 where MyKafkaTable1.id = MyKafkaTable2.id";
Table joinResult = tableEnv.sqlQuery(sqlJoinQuery);
DataStream<Row> resultStream = tableEnv.toAppendStream(joinResult, Row.class);
resultStream.print();
joinResult.printSchema();
DataStream<String> stream = env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "KafkaSource" );
DataStream<String> upperCaseStream = stream.map(new MapFunction<String, String>() {
@Override
public String map(String value) {
System.out.println("Value : " + value);
return value.toUpperCase();
}
});
upperCaseStream.print();
System.out.println("TEST");
env.execute();
}
}
3.insertData kafka数据源插入数据源
package org.flink;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.Json;
import org.apache.flink.table.descriptors.Kafka;
import org.apache.flink.table.descriptors.Schema;
import org.apache.flink.types.Row;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
/**
* @author hanjiajun02
* @date 2023/10/18 16:08
*/
public class InsertData {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "124.222.201.62:9092"); // Kafka服务器地址
properties.setProperty("group.id", "test"); // 消费者组ID
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// 如何配置KafkaSource进行join
KafkaSource<String> kafkaSource = KafkaSource.<String>builder()
.setTopics("quick_start")
.setValueOnlyDeserializer(new SimpleStringSchema())
.setProperties(properties).build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
tableEnv.connect(new Kafka().version("universal").topic("quick_start").startFromLatest().property("bootstrap.servers", "124.222.201.62:9092"))
.withFormat(new Json())
.withSchema( new Schema() // 定义表结构
.field("name", "STRING")
.field("sex", "INT")
.field("id", "INT"))
.createTemporaryTable("MyKafkaTable1");
ProducerRecord<String, String> record = new ProducerRecord<>("quick_start", "{\"name\":\"Shibo\", \"sex\":1, \"id\":1}");
KafkaProducer<String, String> producer = new KafkaProducer<>(properties);
producer.send(record);
tableEnv.executeSql("SELECT * FROM MyKafkaTable1");
tableEnv.connect(new Kafka().version("universal").topic("quick_start").startFromLatest().property("bootstrap.servers", "124.222.201.62:9092"))
.withFormat(new Json())
.withSchema( new Schema() // 定义表结构
.field("name", "STRING")
.field("age", "INT")
.field("id", "INT"))
.createTemporaryTable("MyKafkaTable2");
ProducerRecord<String, String> record_back = new ProducerRecord<>("quick_start", "{\"name\":\"xuchu\", \"age\":1, \"id\":1}");
KafkaProducer<String, String> producer_back = new KafkaProducer<>(properties);
producer_back.send(record_back);
env.execute("insert into KafkaTable1");
}
}
999.pom.xml依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>data-preprocess</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<scala.version>2.12</scala.version>
<flink.version>1.13.2</flink.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.1.0</version>
<exclusions>
<exclusion>
<artifactId>jline</artifactId>
<groupId>jline</groupId>
</exclusion>
<exclusion>
<artifactId>avro</artifactId>
<groupId>org.apache.avro</groupId>
</exclusion>
<exclusion>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
<exclusions>
<exclusion>
<artifactId>avro</artifactId>
<groupId>org.apache.avro</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.1.0</version>
<exclusions>
<exclusion>
<artifactId>antlr-runtime</artifactId>
<groupId>org.antlr</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
<exclusion>
<artifactId>commons-lang</artifactId>
<groupId>commons-lang</groupId>
</exclusion>
<exclusion>
<artifactId>commons-httpclient</artifactId>
<groupId>commons-httpclient</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.0.5</version>
<exclusions>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
<exclusion>
<artifactId>commons-codec</artifactId>
<groupId>commons-codec</groupId>
</exclusion>
<exclusion>
<artifactId>commons-io</artifactId>
<groupId>commons-io</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-auth</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>hadoop-common</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>2.0.5</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.23</version>
</dependency>
<!--解析mysql binlog-->
<dependency>
<groupId>com.github.shyiko</groupId>
<artifactId>mysql-binlog-connector-java</artifactId>
<version>0.21.0</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.12</artifactId>
<version>2.4.0</version>
<exclusions>
<exclusion>
<artifactId>zookeeper</artifactId>
<groupId>org.apache.zookeeper</groupId>
</exclusion>
<exclusion>
<artifactId>snappy-java</artifactId>
<groupId>org.xerial.snappy</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.78</version> <!-- 请使用最新的版本 -->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.13.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.13.2</version>
<exclusions>
<exclusion>
<artifactId>snappy-java</artifactId>
<groupId>org.xerial.snappy</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- 如果你的项目使用了 Scala,你也需要添加 Scala 标准库的依赖 -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.12.10</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.21</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<!--替换成相应的flink版本-->
<version>1.13.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.12</artifactId>
<version>1.13.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.12</artifactId>
<version>1.13.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.12</artifactId>
<version>1.13.0</version>
</dependency>
</dependencies>
</project>