需求背景:
接入kafka 事件分区数据sink到按照项目名创建的topic里
代码实现:
package cn.sensorsdata.dev;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import cn.sensorsdata.utils.GetPropertiesUtils;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
public class SinkKafkaToKafka {
public static final String KEYDESERIALIZER="key.deserializer";
public static final String VALUEDESERIALIZER="value.deserializer";
public static final String KEYDESERIALIZERVALUE="org.apache.kafka.common.serialization.StringDeserializer";
public static final String VALUEDESERIALIZERVALUE="org.apache.kafka.common.serialization.StringDeserializer";
public static final String BOOTSTRAPSERVERS="bootstrap.servers";
public static final String ZOOKEEPERCONNECT="zookeeper.connect";
public static void main(String[] args) throws Exception {
//获取已经存在的项目名称
Map<String,String> projectList=getProjectList();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3,// 尝试重启的次数
Time.of(10, TimeUnit.SECONDS)));
//设置消费者配置
Properties props = new Properties();
props.put(BOOTSTRAPSERVERS, GetPropertiesUtils.getDecodeKeyUtil(BOOTSTRAPSERVERS));
props.put(ZOOKEEPERCONNECT, GetPropertiesUtils.getDecodeKeyUtil(ZOOKEEPERCONNECT));
props.put(KEYDESERIALIZER, KEYDESERIALIZERVALUE);
props.put(VALUEDESERIALIZER, VALUEDESERIALIZERVALUE);
props.setProperty("group.id", GetPropertiesUtils.getDecodeKeyUtil("group.id"));
DataStreamSource<String> student = env.addSource(new FlinkKafkaConsumer<>(
GetPropertiesUtils.getDecodeKeyUtil("topic"), //这个 kafka topic 需要和上面的工具类的 kafka_dumper_launchertopic 一致
new SimpleStringSchema(),
props)).setParallelism(1);
//设置kafka生产者配置
Properties properties = new Properties();
properties.put(BOOTSTRAPSERVERS, GetPropertiesUtils.getDecodeKeyUtil(BOOTSTRAPSERVERS));
properties.put(ZOOKEEPERCONNECT, GetPropertiesUtils.getDecodeKeyUtil(ZOOKEEPERCONNECT));
properties.put(KEYDESERIALIZER, KEYDESERIALIZERVALUE);
properties.put(VALUEDESERIALIZER, VALUEDESERIALIZERVALUE);
//根据消息不同将数据发送到不同的分区
KafkaSerializationSchema<String> serializationSchema = (element, timestamp) -> {
JSONObject jsonObject = JSON.parseObject(element);
String topicStr="dumper_project_"+jsonObject.get("project_id")+"_event_topic";
return new ProducerRecord<>(
topicStr, // target topic
element.getBytes(StandardCharsets.UTF_8)); // record contents
};
//将消息发送到不同分区
FlinkKafkaProducer<String> myProducer = new FlinkKafkaProducer<>(
"default", // target topic
serializationSchema, // serialization schema
properties, // producer config
FlinkKafkaProducer.Semantic.EXACTLY_ONCE); // fault-tolerances
//过滤非不属于既定的项目数据
student.filter(new FilterFunction<String>() {
@Override
public boolean filter(String value) throws Exception {
JSONObject jsonObject = JSON.parseObject(value);
return projectList.get(jsonObject.getString("project"))!=null;
}
}).addSink((SinkFunction<String>) myProducer);
//执行job
env.execute("flink learning connectors kafka");
}
/**
*
* @return 返回项目名,以key value形式
*/
public static Map<String, String> getProjectList(){
String[] projectList = GetPropertiesUtils.getDecodeKeyUtil("projectList").split(",");
HashMap<String,String> hashMap=new HashMap<>();
for(String x :projectList){
hashMap.put(x,x);
}
return hashMap;
}
}