概述
在做大数据处理中,我们常常会使用kafka-consumer-groups 脚本指令或者通过第三方图形化工具(kafka tool)来获取数据此刻堆积情况。如果能够这些堆积情况记录下来,做成各种报表,或许会有其他意想不到的用处。
需求
定时统计kafka的堆积情况,并将结果持久化,用作报表分析。
实现
由于consumer_offset里的内容并非已明文的方式展现,因而无法采用和消费普通topic的方式来操作,因此我在网上进行了相关调研。
方式一 读取consumer_offset
网上有大量文章都采取了该方式,通过kafka的脚步命令来获取查看数据堆积情况,并且查看到的数据无法进行二次处理,没有附上相关的实现代码。唯有下面两个链接提供了相关的实现代码
链接一:来自简书
链接二:来自stackoverflow
从上面链接中的代码可以看出,我们可以获取到消费组的偏移量情况。但是这并不能直接得出数据的堆积情况,我们还需要进一步的处理。因而,我再次翻看了kafka相关开发文档,从中找到另外一种方式,这是api信息 链接三:KafkaAdminClient(请在文档中查找该类)
方式二 通过KafkaAdminClient
环境
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.4.0</version>
</dependency>
下面是实现代码
@Component
@EnableScheduling
@EnableAsync
@Slf4j
public class FlowTask {
@Autowired
StatisticsMapper statisticsMapper;
@Autowired
@Qualifier("kafkaConsumer")
Consumer kafkaConsumer;
@Autowired
AdminClient kafkaAdminClient;
@Autowired
KafkaConfiguration kafkaConfiguration;
@Scheduled(cron = "${task.kafka.cron}")
public void kafkaStatistics() {
log.info("kafkaStatistics task begin, time " + LocalDateTime.now());
try {
//获取消费组
Collection<ConsumerGroupListing> consumerGroupListings = kafkaAdminClient.listConsumerGroups().all().get();
List<KafkaOffset> kafkaOffsetList = new ArrayList<>();
// string groupId + split_character + topic
Map<String, List<KafkaOffset>> kafkaOffsetMap = new HashMap<>();
for (ConsumerGroupListing consumerGroupListing : consumerGroupListings) {
String groupId = consumerGroupListing.groupId();
//获取消费组每个分区的当前偏移量 起始偏移量 终止偏移量 堆积量
Map<TopicPartition, OffsetAndMetadata> topicPartitionOffsetAndMetadataMap = kafkaAdminClient.listConsumerGroupOffsets(consumerGroupListing.groupId()).partitionsToOffsetAndMetadata().get();
//获取当前消费组的消费情况
for (TopicPartition topicPartition : topicPartitionOffsetAndMetadataMap.keySet()) {
Map<TopicPartition, Long> benMap = kafkaConsumer.beginningOffsets(Arrays.asList(topicPartition));
Map<TopicPartition, Long> endMap = kafkaConsumer.endOffsets(Arrays.asList(topicPartition));
OffsetAndMetadata offsetAndMetadata = topicPartitionOffsetAndMetadataMap.get(topicPartition);
KafkaOffset kafkaOffset = new KafkaOffset();
kafkaOffset.setGroupId(groupId);
kafkaOffset.setServers(kafkaConfiguration.getBootStrapServer());
kafkaOffset.setTopic(topicPartition.topic());
kafkaOffset.setPartition(topicPartition.partition());
kafkaOffset.setStart(benMap.get(topicPartition) == null ? 0L : benMap.get(topicPartition));
kafkaOffset.setEnd(endMap.get(topicPartition) == null ? 0L : endMap.get(topicPartition));
kafkaOffset.setOffset(offsetAndMetadata.offset());
kafkaOffset.setLag(kafkaOffset.getEnd() - kafkaOffset.getOffset());
List<KafkaOffset> kafkaOffsetMapValues = kafkaOffsetMap.get(topicPartition.topic() + SPLIT_CHARACTER + groupId);
if (kafkaOffsetMapValues == null) {
List<KafkaOffset> temp = new ArrayList<>();
temp.add(kafkaOffset);
kafkaOffsetMap.put(topicPartition.topic() + SPLIT_CHARACTER + groupId, temp);
} else {
kafkaOffsetMapValues.add(kafkaOffset);
}
kafkaOffsetList.add(kafkaOffset);
}
}
Statistics entity = new Statistics();
entity.setDatabaseName(DataBase.getName(DataBase.DATABASE_KAFKA));
statisticsMapper.delete(new QueryWrapper<>(entity));
LocalDateTime insertTime = LocalDateTime.now();
for (KafkaOffset kafkaOffset : kafkaOffsetList) {
kafkaOffset.setTime(insertTime);
kafkaOffsetMapper.insert(kafkaOffset);
}
//统计每个消费组的堆积量
for (String topicWithGroupId : kafkaOffsetMap.keySet()) {
List<KafkaOffset> kafkaOffsets = kafkaOffsetMap.get(topicWithGroupId);
Statistics statistics = new Statistics();
statistics.setDatabaseName(DataBase.getName(DataBase.DATABASE_KAFKA));
statistics.setTableName(topicWithGroupId);
Long sum = 0L;
for (KafkaOffset kafkaOffset : kafkaOffsets) {
sum += kafkaOffset.getLag();
}
statistics.setCount(sum);
statistics.setTime(insertTime);
statisticsMapper.insert(statistics);
}
} catch (InterruptedException e) {
log.info("kafkaStatistics task happen error : {}", e);
} catch (ExecutionException e) {
log.info("kafkaStatistics task happen error : {}", e);
}
log.info("kafkaStatistics task end, time " + LocalDateTime.now());
}
}
涉及的自定义类KafkaOffset
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName;
import com.baomidou.mybatisplus.extension.activerecord.Model;
import java.io.Serializable;
import java.time.LocalDateTime;
/**
* <p>
*
* </p>
*
* @author Bean
* @since 2019-12-23
*/
@TableName("KAFKA_OFFSET")
public class KafkaOffset extends Model<KafkaOffset> {
private static final long serialVersionUID = 1L;
private String servers;
private String topic;
@TableField("`PARTITION`")
private Integer partition;
@TableField("`START`")
private Long start;
@TableField("`END`")
private Long end;
@TableField("`OFFSET`")
private Long offset;
private Long lag;
@TableField("`TIME`")
private LocalDateTime time;
private String groupId;
private String consumerId;
public String getGroupId() {
return groupId;
}
public void setGroupId(String groupId) {
this.groupId = groupId;
}
public String getConsumerId() {
return consumerId;
}
public void setConsumerId(String consumerId) {
this.consumerId = consumerId;
}
public LocalDateTime getTime() {
return time;
}
public void setTime(LocalDateTime time) {
this.time = time;
}
public String getServers() {
return servers;
}
public void setServers(String servers) {
this.servers = servers;
}
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public Integer getPartition() {
return partition;
}
public void setPartition(Integer partition) {
this.partition = partition;
}
public Long getStart() {
return start;
}
public void setStart(Long start) {
this.start = start;
}
public Long getEnd() {
return end;
}
public void setEnd(Long end) {
this.end = end;
}
public Long getOffset() {
return offset;
}
public void setOffset(Long offset) {
this.offset = offset;
}
public Long getLag() {
return lag;
}
public void setLag(Long lag) {
this.lag = lag;
}
@Override
protected Serializable pkVal() {
return null;
}
@Override
public String toString() {
return "KafkaOffset{" +
"servers=" + servers +
", topic=" + topic +
", partition=" + partition +
", start=" + start +
", end=" + end +
", offset=" + offset +
", lag=" + lag +
"}";
}
}
总结
KafkaOffset 将会作为最原始数据保存在数据库中,作为今后开发的需要
Statistics 将会作为本次定时任务的计算结果。已topic和groupId为最小单位。