Flink 1.12 读取kafka 写入Kudu | 自定义KuduSink | 含源码 | Apache kudu
maven依赖在之前的帖子有写,是共用的,可以做参考
Flink1.12读取Kafka数据写入到Hdfs | 含maven依赖 | FileSink
首先是主类的代码
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import java.io.File;
import java.io.FileInputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
public class KafkaToKudu {
public static void main(String[] args) throws Exception{
//读取配置文件
String config_path=args[0];
ParameterTool parameterTool = ParameterTool.fromPropertiesFile(new FileInputStream(new File(config_path)))
.mergeWith(ParameterTool.fromSystemProperties())
.mergeWith(ParameterTool.fromMap(getenv()));
//创建环境
StreamExecutionEnvironment env = FlinkEnvUtils.creatEnv();
//配置文件放入流式环境
env.getConfig().setGlobalJobParameters(parameterTool);
//配置kafka
Properties props = new Properties();
props.setProperty("bootstrap.servers", parameterTool.get("kafka.ips"));
props.setProperty("group.id", parameterTool.get("kakfa.group.name"));
FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>(parameterTool.get("kakfa.topic.name"), new SimpleStringSchema(), props);
consumer.setCommitOffsetsOnCheckpoints(true);
consumer.setStartFromGroupOffsets();
DataStream<String> stream = env.addSource(consumer);
stream.addSink(new KuduSink()).name("kafkaTokudu_test");
env.execute();
}
private static Map<String, String> getenv()
{
Map<String, String> map = new HashMap();
for (Map.Entry<String, String> entry : System.getenv().entrySet()) {
map.put(entry.getKey(), entry.getValue());
}
return map;
}
}
跟之前一样,传参只有一个配置文件
KuduSink的代码如下
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class KuduSink<IN> extends RichSinkFunction<String> implements Serializable {
private static final long serialVersionUID = 1L;
private KuduUtil kuduUtil;
private static String table_name;
private static String keynums;
@Override
public void open(Configuration param) throws Exception
{
super.open(param);
ParameterTool parameterTool = (ParameterTool) getRuntimeContext().getExecutionConfig().getGlobalJobParameters();
table_name=parameterTool.get("kudu.table.name");
keynums=parameterTool.get("kudu.table.keynums");
this.kuduUtil = new KuduUtil(table_name);
}
@Override
public void invoke(String s) throws IOException{
if ((s.contains("\n")) || (s.contains("\r")) || (s.contains("\r\n"))) {
s = s.trim();
}
List<String> lines=new ArrayList<>(Arrays.asList(s.split("\n",-1)));
for(String line:lines){
//xxx为分隔符
List<String> strs = new ArrayList<>(Arrays.asList(line.split("xxx", -1)));
String op_type = strs.get(0);
//这里可以写一些业务逻辑
if (op_type.toUpperCase().equals("I")) {
this.kuduUtil.insert(strs);
} else if (op_type.toUpperCase().equals("U")) {
this.kuduUtil.update(strs);
} else if (op_type.toUpperCase().equals("D")) {
this.kuduUtil.delete(strs.subList(0,Integer.parseInt(keynums)));
}
}
}
@Override
public void close()
throws Exception
{
this.kuduUtil.close();
}
}
kudu.table.name是kudu的表名
kudu.table.keynums是主键的个数,因为删除操作时只需要填写主键即可
KuduUtil的代码如下:
import org.apache.kudu.client.*;
import java.util.List;
public class KuduUtil {
private static KuduClient client;
private static KuduTable table;
private static KuduSession session;
public KuduUtil(String table_name) throws KuduException {
//这里填写kudu的ip及端口
String hosts = "xxx:xxx";
String tableName=table_name;
client = new KuduClient.KuduClientBuilder(hosts).build();
session = client.newSession();
session.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH);
session.setMutationBufferSpace(20000);
table = client.openTable(tableName);
}
public void insert(List<String> columns) throws KuduException {
Insert insert = table.newInsert();
PartialRow row = insert.getRow();
for (int i = 0; i < columns.size(); i++) {
row.addString(i,columns.get(i));
}
session.apply(insert);
session.flush();
}
public void update(List<String> columns) throws KuduException {
Update update = table.newUpdate();
PartialRow row = update.getRow();
for(int i=0;i<columns.size();i++) {
row.addString(i,columns.get(i));
}
session.apply(update);
session.flush();
}
public void delete(List<String> columns) throws KuduException {
Delete delete = table.newDelete();
PartialRow row = delete.getRow();
for(int i=0;i<columns.size();i++){
row.addString(i,columns.get(i));
}
session.apply(delete);
session.flush();
}
public void close()
throws KuduException {
if (null != session) {
session.flush();
session.close();
}
if (null != client) {
client.shutdown();
}
}
}
搞定!!!