以Kafka 介绍flink jar实现flink的监控指标发布,默认flink jar实现方式有3种:
1.基于官方提供的DataStream API开发
由于官方提供的DataStream connector默认实现了指标的发布,因为编写flink jar运行即存在监控指标,说明文档如下:
Kafka Source和sink:https://nightlies.apache.org/flink/flink-docs-master/zh/docs/connectors/datastream/kafka/
监控指标说明: https://nightlies.apache.org/flink/flink-docs-master/zh/docs/connectors/datastream/kafka/#%E7%9B%91%E6%8E%A7
@Slf4j
public class Metrix {
public static void main(String[] args) throws Exception {
// set up the streaming execution environment
Configuration configuration = new Configuration();
loadConfig(configuration);
configuration.setString(PipelineOptions.NAME, "metrixflink");
EnvironmentSettings settings = EnvironmentSettings.newInstance().build();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(configuration);
TableEnvironment tEnv = StreamTableEnvironment.create(env, settings);
tEnv.getConfig().getConfiguration().setString("pipeline.name", "metrixflink");
log.info("tEnv set up");
KafkaSource<String> source = KafkaSource.<String>builder()
.setBootstrapServers("kafka_ip:xxx")
.setProperty("security.protocol","SASL_PLAINTEXT")
.setProperty("sasl.mechanism","SCRAM-SHA-256")
.setProperty("sasl.jaas.config","org.apache.kafka.common.security.scram.ScramLoginModule required username=\"admin\" password=\"*******\";")
.setTopics("topic_1")
.setGroupId("my-group")
.setStartingOffsets(OffsetsInitializer.earliest())
.setValueOnlyDeserializer(new SimpleStringSchema())
.build();
KafkaSink<String> sink = KafkaSink.<String>builder()
.setBootstrapServers("kafka_ip:xxx")
.setProperty("security.protocol","SASL_PLAINTEXT")
.setProperty("sasl.mechanism","SCRAM-SHA-256")
.setProperty("sasl.jaas.config","org.apache.kafka.common.security.scram.ScramLoginModule required username=\"admin\" password=\"*******\";")
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic("topic_2")
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE)
.build();
env.fromSource(source, WatermarkStrategy.noWatermarks(), "Kafka Source").sinkTo(sink);
env.execute();
}
public static void loadConfig(Configuration configuration) throws FileNotFoundException {
//默认从当前目录下加载配置文件,如果没有加载成功则从jar中加载
String configFileName = "flink-conf.yaml";
File configFile = new File(System.getProperty("user.dir") + File.separator + configFileName);
InputStream inputStream = null;
if (configFile.exists()) {
log.info("从{}加载配置文件", configFile.getAbsoluteFile());
inputStream = new FileInputStream(configFile);
} else {
log.info("从jar加载配置文件", configFile.getAbsoluteFile());
inputStream = FinkCase.class.getClassLoader().getResourceAsStream(configFileName);
}
if (Objects.nonNull(inputStream)) {
configuration.addAll(loadYAMLResource(inputStream));
}
}
private static Configuration loadYAMLResource(InputStream input) {
final Configuration config = new Configuration();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(input))) {
String line;
int lineNo = 0;
while ((line = reader.readLine()) != null) {
lineNo++;
// 1. check for comments
String[] comments = line.split("#", 2);
String conf = comments[0].trim();
// 2. get key and value
if (conf.length() > 0) {
String[] kv = conf.split(": ", 2);
// skip line with no valid key-value pair
if (kv.length == 1) {
log.warn("Error while trying to split key and value in configuration file :" + lineNo + ": \"" + line + "\"");
continue;
}
String key = kv[0].trim();
String value = kv[1].trim();
// sanity check
if (key.length() == 0 || value.length() == 0) {
log.warn("Error after splitting key and value in configuration file :" + lineNo + ": \"" + line + "\"");
continue;
}
log.info("Loading configuration property: {}, {}", key, value);
config.setString(key, value);
}
}
} catch (IOException e) {
throw new RuntimeException("Error parsing YAML configuration.", e);
}
return config;
}
}
2. 代码逻辑执行flink SQL
由于flink sql的实际实现类同方式1
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings tableEnvSettings = newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, tableEnvSettings);
tableEnv.executeSql(sourceSql);
tableEnv.executeSql(sinkSql);
tableEnv.executeSql(insertSql);
}
3.自定义RichFunction
@Slf4j
public class Metrix {
public static void main(String[] args) throws Exception {
// set up the streaming execution environment
Configuration configuration = new Configuration();
loadConfig(configuration);
configuration.setString(PipelineOptions.NAME, "metrixflink");
EnvironmentSettings settings = EnvironmentSettings.newInstance().build();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(configuration);
TableEnvironment tEnv = StreamTableEnvironment.create(env, settings);
tEnv.getConfig().getConfiguration().setString("pipeline.name", "metrixflink");
log.info("tEnv set up");
KafkaSink<String> sink = KafkaSink.<String>builder()
.setBootstrapServers("ip:xxx")
.setProperty("security.protocol", "SASL_PLAINTEXT")
.setProperty("sasl.mechanism", "SCRAM-SHA-256")
.setProperty("sasl.jaas.config", "org.apache.kafka.common.security.scram.ScramLoginModule required username=\"admin\" password=\"****\";")
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic("topic_1")
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE)
.build();
env.addSource(new MySource()).sinkTo(sink);
env.execute();
}
public static class MySource extends RichSourceFunction<String> {
//声明一个变量,作为控制数据生成的标识位
private Boolean running = true;
private transient Counter counter;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
// 注册一个名为my_source_record_send的指标
this.counter = getRuntimeContext().getMetricGroup().counter("my_source_record_send");
}
@Override
public void run(SourceContext<String> ctx) throws Exception {
//在指定数据集中随机选取数据
Random random = new Random();
while (running) {
ctx.collect(new String("record,index=" + random.nextInt()));
// 每次发生一条数据,指标值更新
this.counter.inc();
//隔一秒生成一个点击事件,方面观测
Thread.sleep(1000);
}
}
@Override
public void cancel() {
running = false;
}
}
public static void loadConfig(Configuration configuration) throws FileNotFoundException {
......
}
private static Configuration loadYAMLResource(InputStream input) {
......
}
}
sink
public static class MyKafkaSource extends RichSourceFunction<String> {
private transient Counter counter;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
this.counter = getRuntimeContext().getMetricGroup().counter("my_source_record_send");
}
@Override
public void run(SourceContext<String> ctx) throws Exception {
Properties properties = new Properties();
properties.put("bootstrap.servers", "ip:xxx");
properties.put("group.id", "0");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("sasl.jaas.config", "org.apache.kafka.common.security.scram.ScramLoginModule\u0020required username=\"admin\"\u0020password=\"******\";");
properties.put("security.protocol", "SASL_PLAINTEXT");
properties.put("sasl.mechanism", "SCRAM-SHA-256");
properties.setProperty("enable.auto.commit", "true");
properties.setProperty("auto.offset.reset", "earliest");
properties.put("auto.commit.interval.ms", "1000");
KafkaConsumer consumer = new KafkaConsumer<String, String>(properties);
consumer.subscribe(Collections.singletonList("topic1"));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(10);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.value());
ctx.collect("record,index=" + record.value());
this.counter.inc();
}
consumer.commitAsync();
}
}
@Override
public void cancel() {
}
}
效果