Flink 自定义sink 写入Kafka

添加依赖

<dependency>
	<groupId>org.apache.flink</groupId>
	<artifactId>flink-connector-kafka_2.12</artifactId>
	<version>1.13.2</version>
	<scope>provided</scope>
</dependency>

基于 Flink 服务提交任务并执行时需要的依赖包

基于 flink 服务器提交任务前,先上传依赖包到 flink 的 lib 目录下;然后重启 flink 服务,使 jar 进行加载;否则会出现 ClassNoFoundException 的异常。

  • flink-connector-kafka_2.12-1.13.2.jar
  • kafka-clients-2.4.1.jar

构建KafkaSink参数实例

/**
 * kafka sink 参数实例
 * @author yinlilan
 *
 */
public class KafkaSink implements Serializable {

	private static final long serialVersionUID = -6378076276774453062L;

	private String bootStrapServers;
	
	private String groupId;
	
	private String productId;

	private String domain;
	
	private String type;
	
	private String data;

	public String getBootStrapServers() {
		return bootStrapServers;
	}

	public String getGroupId() {
		return groupId;
	}

	public String getProductId() {
		return productId;
	}

	public String getDomain() {
		return domain;
	}

	public String getType() {
		return type;
	}

	public String getData() {
		return data;
	}

	public KafkaSink(Object obj) {
		final JSONObject json = JSONObject.parseObject(obj.toString());
		this.bootStrapServers = json.getString("bootStrapServers");
		this.groupId = json.getString("groupId");
		this.productId = json.getString("productId");
		this.domain = json.getString("domain");
		this.type = json.getString("type");
		if(json.containsKey("data")) {
			this.data = json.getString("data");
		}
	}
}

构建自定义KafkaMQSink

基于FlinkKafkaProducer< T > 类实现KafkaSink,其中KafkaSerializationSchema< T >类型是用于数据序列化的,可以将数据组装成你想要的方式然后发送出去。
如果数据是String类型的可以直接用 SimpleStringSchema() 自动进行序列化即可。

import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Properties;

import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.producer.ProducerRecord;

import com.ygsoft.dataprocess.vo.sink.KafkaSink;

/**
 * kafka sink 初始化
 * @author yinlilan
 *
 */
public class KafkaPropertySink implements Serializable {

	private static final long serialVersionUID = -7477350968706636648L;
	
	private FlinkKafkaProducer<Map<String, String>> producer;
	
	public KafkaPropertySink(final KafkaSink sinkParams) {
		Properties properties = new Properties();
    	properties.setProperty(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, sinkParams.getBootStrapServers());
    	// Flink Kafka Consumer 支持发现动态创建的 Kafka 分区,并使用精准一次的语义保证去消耗它们
    	properties.setProperty("flink.partition-discovery.interval-millis", "10000");
    	
    	final KafkaSerializationSchema<Map<String, String>> deserializer = new KafkaSerializationSchema<Map<String, String>>(){
			
    		private static final long serialVersionUID = 115722639942583321L;

			@Override
			public ProducerRecord<byte[], byte[]> serialize(Map<String, String> element, Long timestamp) {
				 final String topic = element.get("topic");
			     final String value = element.get("value").toString();
			     return new ProducerRecord<>(topic, value.getBytes(StandardCharsets.UTF_8));
			}

    	};
    	
    	producer = new FlinkKafkaProducer<>("defult", deserializer, properties, FlinkKafkaProducer.Semantic.NONE);
    	
	}

	public FlinkKafkaProducer<Map<String, String>> getProducer() {
		return producer;
	}
}

import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import java.util.Map;
import java.util.Properties;

import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.producer.ProducerRecord;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.ygsoft.dataprocess.vo.sink.KafkaSink;

/**
 * 事件 kafka sink 初始化
 * @author yinlilan
 *
 */
public class KafkaEventSink implements Serializable {

	private static final long serialVersionUID = -8333995037065268493L;

	private FlinkKafkaProducer<Map<String, String>> producer;
	
	public KafkaEventSink(final KafkaSink sinkParams) {
		Properties properties = new Properties();
    	properties.setProperty(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, sinkParams.getBootStrapServers());
    	// Flink Kafka Consumer 支持发现动态创建的 Kafka 分区,并使用精准一次的语义保证去消耗它们
    	properties.setProperty("flink.partition-discovery.interval-millis", "10000");
    	
    	final KafkaSerializationSchema<Map<String, String>> deserializer = new KafkaSerializationSchema<Map<String, String>>(){
			
    		private static final long serialVersionUID = 115722639942583321L;

			@Override
			public ProducerRecord<byte[], byte[]> serialize(Map<String, String> element, Long timestamp) {
				final String topic = element.get("topic").replace("property", "event");
				final JSONArray events = new JSONArray();
				final JSONObject event = JSONObject.parseObject(sinkParams.getData());
				final JSONArray propertys = JSONArray.parseArray(element.get("value").toString());
				for(int i=0; i<propertys.size(); i++) {
					final JSONObject property = propertys.getJSONObject(i);
					event.put("productId", property.get("productId"));
					event.put("deviceCode", property.get("deviceCode"));
					if (property.containsKey("subDeviceId")) {
						event.put("subDeviceId", property.get("subDeviceId"));
					}
					event.put("time", new Date().getTime());
					events.add(event);
				}
				
				return new ProducerRecord<>(topic, events.toString().getBytes(StandardCharsets.UTF_8));
			}

    	};
    	
    	producer = new FlinkKafkaProducer<>("default", deserializer, properties, FlinkKafkaProducer.Semantic.NONE);
    	
	}

	public FlinkKafkaProducer<Map<String, String>> getProducer() {
		return producer;
	}
}

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
自定义 FlinkKafka Sink,您可以通过实现 `org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema` 接口来实现自定义的序列化逻辑。以下是一个简单的示例: ```java import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.kafka.clients.producer.ProducerRecord; public class CustomKafkaSerializationSchema implements KafkaSerializationSchema<String> { private final String topic; public CustomKafkaSerializationSchema(String topic) { this.topic = topic; } @Override public ProducerRecord<byte[], byte[]> serialize(String element, Long timestamp) { // 将 String 类型的数据序列化为字节数组,可以根据需要进行自定义序列化逻辑 byte[] serializedValue = element.getBytes(); return new ProducerRecord<>(topic, serializedValue); } } ``` 然后,您可以在 Flink 程序中使用这个自定义KafkaSink,示例如下: ```java import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; public class CustomKafkaSinkExample { public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 创建数据流 DataStream<String> stream = ... // 创建自定义KafkaSink String topic = "your-topic"; FlinkKafkaProducer<String> kafkaSink = new FlinkKafkaProducer<>( topic, new CustomKafkaSerializationSchema(topic), properties); // 将数据流写入 Kafka stream.addSink(kafkaSink); env.execute("Custom Kafka Sink Example"); } } ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值