package com.msxf.datamart.applyMain;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.serializer.SerializerFeature;
import com.msxf.datamart.common.SideOutEventType;
import com.msxf.datamart.cusBaseInfo.AppserverCusBaseInfoAsyncFunction;
import com.msxf.datamart.pojo.ApplyMainPojo;
import com.msxf.datamart.pojo.AppserverCusBaseInfoPojo;
import com.msxf.datamart.utils.KafkaPropertiesUtil;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
public class ApplyMainApp {
private static final Logger LOG = LoggerFactory.getLogger(ApplyMainApp.class);
public static void main(String[] args) throws Exception {
if (args == null || args.length < 6) {
throw new IllegalArgumentException(
"输入参数不正确,按照kafka brokers、topic、hbase zk、hbase projectName顺序依次输入");
}
String kafkaUrl = args[0];
String topic = args[1];
List<String> topics = KafkaPropertiesUtil.getTopics(topic);
String datamart = args[2];
//kafka ssl userName
String userName = args[3];
//kafka ssl password
String password = args[4];
String sinkKafkaUrl = "datamart-kfk-mdc-01.middlware.msxf.lo:9092,datamart-kfk-mdc-02.middlware.msxf.lo:9092,datamart-kfk-mdc-03.middlware.msxf.lo:9092";
String sinkU = "appodn";
String sinkP = "czledm5pira4qugxhv06";
//数据集市集群
//String outSideKafkaUrl = kafkaUrl;
//String outSideU = userName;
//String outSideP = password;
String outSideKafkaUrl = "datamart-kfk-mdc-01.middlware.msxf.lo:9092,datamart-kfk-mdc-02.middlware.msxf.lo:9092,datamart-kfk-mdc-03.middlware.msxf.lo:9092";
String outSideU = "appodn";
String outSideP = "czledm5pira4qugxhv06";
String groupId = args[5];
String timeDuration = args[6];
if (args.length > 7) {
sinkKafkaUrl = kafkaUrl;
sinkU = userName;
sinkP = password;
}
System.setProperty("HADOOP_USER_NAME", "flink");
// set up execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//非常关键,一定要设置启动检查点,设置最少一次处理语义和恰一次处理语义
env.enableCheckpointing(120000, CheckpointingMode.EXACTLY_ONCE);
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
// checkpoint的清楚策略
CheckpointConfig checkpointConfig = env.getCheckpointConfig();
checkpointConfig.enableExternalizedCheckpoints(
CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
checkpointConfig.setCheckpointTimeout(60000);
Properties binlogProps = KafkaPropertiesUtil.buildProperties(kafkaUrl, groupId, userName, password);
Properties sinkProps = KafkaPropertiesUtil.buildProperties(sinkKafkaUrl, groupId, sinkU, sinkP);
Properties sideOutProps = KafkaPropertiesUtil.buildProperties(outSideKafkaUrl, groupId, outSideU, outSideP);
Properties sideOutSinkProps = KafkaPropertiesUtil.buildProperties(outSideKafkaUrl, groupId, outSideU, outSideP);
FlinkKafkaConsumer010<String> binlogConsumer = new FlinkKafkaConsumer010<>(topics, new SimpleStringSchema(), binlogProps);
FlinkKafkaConsumer010<String> sideOutConsumer = new FlinkKafkaConsumer010<>("datamart_tag_sideout", new SimpleStringSchema(), sideOutProps);
//FlinkKafkaConsumer010<String> sideOutConsumer = new FlinkKafkaConsumer010<>("tianshu_sideout_online", new SimpleStringSchema(), sideOutProps);
//解析binlog 生成row
DataStreamSource<String> binlogSource = env.addSource(binlogConsumer);
//将sideOut转换成row
SingleOutputStreamOperator<String> sideSource = env.addSource(sideOutConsumer)
.filter((FilterFunction<String>) value -> {
JSONObject json = JSON.parseObject(value);
String flag = json.getString("flag");
return SideOutEventType.APPLY_MAIN.getCode().equals(flag);
});
//合并流
DataStream<String> source = binlogSource.union(sideSource);
//关联hbase
SingleOutputStreamOperator<String> sinkSource = AsyncDataStream.unorderedWait(
source,
new ApplyMainAsyncFunction(datamart),
10000,
TimeUnit.MILLISECONDS,
20
);
//未关联上的判断超时时间,过滤,写入侧输出
OutputTag<String> sideOut = new OutputTag<String>("sideOut") {
};
SingleOutputStreamOperator<String> result = sinkSource.process(new ProcessFunction<String, String>() {
@Override
public void processElement(String value, Context ctx, Collector<String> out) throws Exception {
JSONObject json = JSON.parseObject(value);
String flag = json.getString("flag");
String time = json.getString("time");
if (SideOutEventType.APPLY_MAIN.getCode().equals(flag)) {
if (System.currentTimeMillis() - Long.parseLong(time) <= Integer.parseInt(timeDuration)) {
ctx.output(sideOut, value);
} else {
out.collect(convert(value));
}
} else {
out.collect(value);
}
}
});
result.getSideOutput(sideOut)
.addSink(new FlinkKafkaProducer010<>(
"datamart_tag_sideout",
new SimpleStringSchema(),
sideOutSinkProps)).name("sideOutSink");
//主流写入sink
result.addSink(new FlinkKafkaProducer010<>(
"dwd_cdp_apply_main",
new SimpleStringSchema(),
sinkProps)).name("KafkaSink");
env.execute("ApplyMainApp");
}
public static String convert(String input) {
JSONObject json;
try {
json = JSON.parseObject(input);
ApplyMainPojo pojo = new ApplyMainPojo();
pojo.setEtl_record_type(json.getString("etl_record_type"));
pojo.setEtl_time(json.getString("etl_time"));
pojo.setUnique_id(json.getString("unique_id"));
pojo.setUser_id(null);
pojo.setCreate_time(json.getString("time_inst"));
pojo.setUpdate_time(json.getString("time_upd"));
pojo.setAppl_channel(json.getString("apply_channel"));
pojo.setAppl_prod_cd(json.getString("product_code"));
pojo.setReal_channel_appl(json.getString("cooperator_id"));
pojo.setApply_no(json.getString("apply_no"));
pojo.setApply_mode_enums(json.getString("apply_type"));
String s = JSON.toJSONString(pojo, SerializerFeature.WriteMapNullValue);
return s;
} catch (JSONException e) {
LOG.warn("ROW : {} convert error", input);
return input;
}
}
}
flink侧输出流
最新推荐文章于 2024-02-22 20:00:00 发布
该博客展示了如何使用Apache Flink从Kafka读取数据,进行实时处理,包括设置检查点、数据转换、过滤、异步处理和侧输出,并将结果写入新的Kafka主题。主要涉及Flink的DataStream API、Kafka连接器以及JSON解析。
摘要由CSDN通过智能技术生成