Flink 自定义sink 写入 Kafka
添加依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.12</artifactId>
<version>1.13.2</version>
<scope>provided</scope>
</dependency>
基于 Flink 服务提交任务并执行时需要的依赖包
基于 flink 服务器提交任务前,先上传依赖包到 flink 的 lib 目录下;然后重启 flink 服务,使 jar 进行加载;否则会出现 ClassNoFoundException 的异常。
- flink-connector-kafka_2.12-1.13.2.jar
- kafka-clients-2.4.1.jar
构建KafkaSink参数实例
/**
* kafka sink 参数实例
* @author yinlilan
*
*/
public class KafkaSink implements Serializable {
private static final long serialVersionUID = -6378076276774453062L;
private String bootStrapServers;
private String groupId;
private String productId;
private String domain;
private String type;
private String data;
public String getBootStrapServers() {
return bootStrapServers;
}
public String getGroupId() {
return groupId;
}
public String getProductId() {
return productId;
}
public String getDomain() {
return domain;
}
public String getType() {
return type;
}
public String getData() {
return data;
}
public KafkaSink(Object obj) {
final JSONObject json = JSONObject.parseObject(obj.toString());
this.bootStrapServers = json.getString("bootStrapServers");
this.groupId = json.getString("groupId");
this.productId = json.getString("productId");
this.domain = json.getString("domain");
this.type = json.getString("type");
if(json.containsKey("data")) {
this.data = json.getString("data");
}
}
}
构建自定义KafkaMQSink
基于FlinkKafkaProducer< T > 类实现KafkaSink,其中KafkaSerializationSchema< T >类型是用于数据序列化的,可以将数据组装成你想要的方式然后发送出去。
如果数据是String类型的可以直接用 SimpleStringSchema() 自动进行序列化即可。
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Properties;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.producer.ProducerRecord;
import com.ygsoft.dataprocess.vo.sink.KafkaSink;
/**
* kafka sink 初始化
* @author yinlilan
*
*/
public class KafkaPropertySink implements Serializable {
private static final long serialVersionUID = -7477350968706636648L;
private FlinkKafkaProducer<Map<String, String>> producer;
public KafkaPropertySink(final KafkaSink sinkParams) {
Properties properties = new Properties();
properties.setProperty(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, sinkParams.getBootStrapServers());
// Flink Kafka Consumer 支持发现动态创建的 Kafka 分区,并使用精准一次的语义保证去消耗它们
properties.setProperty("flink.partition-discovery.interval-millis", "10000");
final KafkaSerializationSchema<Map<String, String>> deserializer = new KafkaSerializationSchema<Map<String, String>>(){
private static final long serialVersionUID = 115722639942583321L;
@Override
public ProducerRecord<byte[], byte[]> serialize(Map<String, String> element, Long timestamp) {
final String topic = element.get("topic");
final String value = element.get("value").toString();
return new ProducerRecord<>(topic, value.getBytes(StandardCharsets.UTF_8));
}
};
producer = new FlinkKafkaProducer<>("defult", deserializer, properties, FlinkKafkaProducer.Semantic.NONE);
}
public FlinkKafkaProducer<Map<String, String>> getProducer() {
return producer;
}
}
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import java.util.Map;
import java.util.Properties;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.producer.ProducerRecord;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.ygsoft.dataprocess.vo.sink.KafkaSink;
/**
* 事件 kafka sink 初始化
* @author yinlilan
*
*/
public class KafkaEventSink implements Serializable {
private static final long serialVersionUID = -8333995037065268493L;
private FlinkKafkaProducer<Map<String, String>> producer;
public KafkaEventSink(final KafkaSink sinkParams) {
Properties properties = new Properties();
properties.setProperty(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, sinkParams.getBootStrapServers());
// Flink Kafka Consumer 支持发现动态创建的 Kafka 分区,并使用精准一次的语义保证去消耗它们
properties.setProperty("flink.partition-discovery.interval-millis", "10000");
final KafkaSerializationSchema<Map<String, String>> deserializer = new KafkaSerializationSchema<Map<String, String>>(){
private static final long serialVersionUID = 115722639942583321L;
@Override
public ProducerRecord<byte[], byte[]> serialize(Map<String, String> element, Long timestamp) {
final String topic = element.get("topic").replace("property", "event");
final JSONArray events = new JSONArray();
final JSONObject event = JSONObject.parseObject(sinkParams.getData());
final JSONArray propertys = JSONArray.parseArray(element.get("value").toString());
for(int i=0; i<propertys.size(); i++) {
final JSONObject property = propertys.getJSONObject(i);
event.put("productId", property.get("productId"));
event.put("deviceCode", property.get("deviceCode"));
if (property.containsKey("subDeviceId")) {
event.put("subDeviceId", property.get("subDeviceId"));
}
event.put("time", new Date().getTime());
events.add(event);
}
return new ProducerRecord<>(topic, events.toString().getBytes(StandardCharsets.UTF_8));
}
};
producer = new FlinkKafkaProducer<>("default", deserializer, properties, FlinkKafkaProducer.Semantic.NONE);
}
public FlinkKafkaProducer<Map<String, String>> getProducer() {
return producer;
}
}