Spring-Kafka

Spring-Kafka 与 SpringBoot 集成

最近项目遇到需要监控mysql数据库变化 , 需要及时更新es索引的问题 , 对于日常后台操作 , 数据量少直接用如下代码就行,
但是遇到数据清洗后的百万数据更新就很慢了

	@KafkaListener(topics = { "dyly_vc_project" })
    public void handleProject(ConsumerRecord<String,String> record) {
        String messageStr = record.value();
        System.out.println("project收到消息: "+messageStr);
        Message message = JSON.parseObject(messageStr, new TypeReference<Message>() {});
        try {
            importESBuilder.executeProject(message.getData(),message.getSqlType());
        } catch (Exception e) {
           log.error("topic  project  解析消息异常", e);
        }
    }
集成

依赖, springBoot 2.1.3

<dependency>
	<groupId>org.springframework.kafka</groupId>
	<artifactId>spring-kafka</artifactId>
</dependency>

数据监控

采用Canal-Kafka 进行监控数据库, 将有变化的数据生产给kafka , 对 增, 改, 删 都能监控到
操作方法:
https://github.com/alibaba/canal/wiki/Canal-Kafka-RocketMQ-QuickStart

批量消费

application.properties 配置

spring.kafka.consumer.bootstrapServers=10.10.5.112:9092,10.10.5.110:9092,10.10.5.113:9092
#  消费组id ,这个后面没有生效,代码里重新指定了
spring.kafka.consumer.groupId=mysql-es
spring.kafka.consumer.autoOffsetReset=earliest
#是否开启自动提交
spring.kafka.consumer.enableAutoCommit=false
#自动提交的间隔时间
kafka.consumer.auto-commit-interval=2000
#每隔最大多长时间(毫秒)poll 一次  一条业务线没有跑完,时间已经超出了要poll的时间,就会报错,认为消费者出问题了
kafka.consumer.max-poll-interval=15000
#批量消费一次最大拉取的数据量
spring.kafka.consumer.max-poll-records=10000
#连接超时时间
kafka.consumer.session-timeout=20000
#设置拉取数据的大小,150M
kafka.consumer.max-partition-fetch-bytes=157286400
#设置消费线程数 设置并发量,小于或等于Topic的分区数
kafka.listener.concurrency=3
#只限自动提交
spring.kafka.listener.poll-timeout=15000
#是否开启批量消费,true表示批量消费
kafka.listener.batch-listener = true

autoOffsetReset
earliest : 当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,从头开始消费
latest : 当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,消费新产生的该分区下的数据
none : topic各分区都存在已提交的offset时,从offset后开始消费;只要有一个分区不存在已提交的offset,则抛出异常

AckMode :
RECORD : 每处理一条commit一次
BATCH(默认) : 每次poll的时候批量提交一次,频率取决于每次poll的调用频率
TIME : 每次间隔ackTime的时间去commit(跟auto commit interval有什么区别呢?)
COUNT : 累积达到ackCount次的ack去commit
COUNT_TIME : ackTime或ackCount哪个条件先满足,就commit
MANUAL : listener负责ack,但是背后也是批量上去
MANUAL_IMMEDIATE : listner负责ack,每调用一次,就立即commit

package com.dyly.mysqles.config;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.config.ConcurrentKafkaListenerContainerFactory;
import org.springframework.kafka.core.ConsumerFactory;
import org.springframework.kafka.core.DefaultKafkaConsumerFactory;
import org.springframework.kafka.listener.ContainerProperties;

import java.util.HashMap;
import java.util.Map;

@Configuration
@EnableKafka
public class KafkaConsumerConfig {
    @Value("${spring.kafka.consumer.bootstrapServers}")
    private String bootstrapServers;

    @Value("${spring.kafka.consumer.enableAutoCommit}")
    private Boolean autoCommit;

    @Value("${kafka.consumer.auto-commit-interval}")
    private Integer autoCommitInterval;

    @Value("${spring.kafka.consumer.max-poll-records}")
    private Integer maxPollRecords;

    @Value("${spring.kafka.consumer.autoOffsetReset}")
    private String autoOffsetReset;


    @Value("${spring.kafka.listener.poll-timeout}")
    private Long pollTimeout;

    @Value("${kafka.consumer.session-timeout}")
    private String sessionTimeout;

    @Value("${kafka.listener.batch-listener}")
    private Boolean batchListener;

    @Value("${kafka.consumer.max-poll-interval}")
    private Integer maxPollInterval;

    @Value("${kafka.consumer.max-partition-fetch-bytes}")
    private Integer maxPartitionFetchBytes;

    @Value("${kafka.listener.concurrency}")
    private Integer concurrency;

    @Value("${spring.kafka.consumer.groupId}")
    private String groupId;

    @Bean("batchContainerFactory")
    public  ConcurrentKafkaListenerContainerFactory<String, String> kafkaListenerContainerFactory() {
        ConcurrentKafkaListenerContainerFactory<String, String> factory = new ConcurrentKafkaListenerContainerFactory<>();
        factory.setConsumerFactory(consumerFactory());
        //批量消费
        factory.setBatchListener(true);
        factory.setConcurrency(concurrency);
        //如果消息队列中没有消息,等待timeout毫秒后,调用poll()方法。
        // 如果队列中有消息,立即消费消息,每次消费的消息的多少可以通过max.poll.records配置。
        //手动提交无需配置
        factory.getContainerProperties().setPollTimeout(pollTimeout);
        //设置提交偏移量的方式, MANUAL_IMMEDIATE 表示消费一条提交一次;MANUAL表示批量提交一次
        factory.getContainerProperties().setAckMode(ContainerProperties.AckMode.MANUAL_IMMEDIATE);
        return factory;
    }

    public  ConsumerFactory<String, String> consumerFactory() {
        return new DefaultKafkaConsumerFactory<>(consumerConfigs());
    }

    public  Map<String, Object> consumerConfigs() {
        Map<String, Object> props = new HashMap<>(11);
        props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, autoCommitInterval);
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
        props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, autoCommit);
        props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, autoOffsetReset);
        props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, sessionTimeout);
        props.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, maxPollInterval);
        props.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, maxPartitionFetchBytes);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
        props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        return props;
    }
}

@KafkaListener的注解属性。
id:消费者的id,当GroupId没有被配置的时候,默认id为GroupId
containerFactory:上面提到了@KafkaListener区分单数据还是多数据消费只需要配置一下注解的containerFactory属性就可以了,这里面配置的是监听容器工厂,也就是ConcurrentKafkaListenerContainerFactory,配置BeanName
topics:需要监听的Topic,可监听多个
topicPartitions:可配置更加详细的监听信息,必须监听某个Topic中的指定分区,或者从offset为200的偏移量开始监听
errorHandler:监听异常处理器,配置BeanName
groupId:消费组ID
idIsGroup:id是否为GroupId
clientIdPrefix:消费者Id前缀
beanRef:真实监听容器的BeanName,需要在 BeanName前加 “__”

 @KafkaListener(id="dyly_vc_project-0",groupId="dyly_vc_project",containerFactory = "batchContainerFactory",topicPartitions = { @TopicPartition(topic = "dyly_vc_project", partitions = {"0"})})
    public void handleProject(List<ConsumerRecord<String,String>> records,Acknowledgment ack) {
        if(records !=null && records.size()>0){
            System.out.println("dyly_vc_project 分区0 收到消息,共"+records.size()+"条, 第一条为: "+records.get(0).value());
            Map<String, List<Message>> map = CommTool.resolveRecords(records,"dyly_vc_project");
            if(map.get(update)!=null){
                List<Message> updateList = map.get(update);
            }
            if(map.get(insert)!=null){
                List<Message> insertList = map.get(insert);
            }
            if(map.get(delete)!=null){
                List<Message> deleteList = map.get(delete);
            }
        }
        ack.acknowledge();
    }
    @KafkaListener(id="dyly_vc_project-1",groupId="dyly_vc_project",containerFactory = "batchContainerFactory",topicPartitions = { @TopicPartition(topic = "dyly_vc_project", partitions = {"1"})})
    public void handleProject1(List<ConsumerRecord<String,String>> records,Acknowledgment ack) {
        if(records !=null && records.size()>0){
            System.out.println("dyly_vc_project 分区1 收到消息,共"+records.size()+"条, 第一条为: "+records.get(0).value());
            Map<String, List<Message>> map = CommTool.resolveRecords(records,"dyly_vc_project");
            if(map.get(update)!=null){
                List<Message> updateList = map.get(update);
            }
            if(map.get(insert)!=null){
                List<Message> insertList = map.get(insert);
            }
            if(map.get(delete)!=null){
                List<Message> deleteList = map.get(delete);
            }
        }
        ack.acknowledge();
    }
    @KafkaListener(id="dyly_vc_project-2",groupId="dyly_vc_project",containerFactory = "batchContainerFactory",topicPartitions = { @TopicPartition(topic = "dyly_vc_project", partitions = {"2"})})
    public void handleProject2(List<ConsumerRecord<String,String>> records,Acknowledgment ack) {
        if(records !=null && records.size()>0){
            System.out.println("dyly_vc_project 分区2 收到消息,共"+records.size()+"条, 第一条为: "+records.get(0).value());
            Map<String, List<Message>> map = CommTool.resolveRecords(records,"dyly_vc_project");
            if(map.get(update)!=null){
                List<Message> updateList = map.get(update);
            }
            if(map.get(insert)!=null){
                List<Message> insertList = map.get(insert);
            }
            if(map.get(delete)!=null){
                List<Message> deleteList = map.get(delete);
            }
        }
        ack.acknowledge();
    }
public class CommTool {
    private static String update="UPDATE";
    private static String insert="INSERT";
    private static String delete="DELETE";
    /**
     * 将Map中的key由下划线转换为驼峰
     *
     * @param map
     * @return
     */
    public static Map<String, String> formatMapKey(Map<String, String> map) {

        Map<String, String> newMap = new HashMap<>();
        Iterator<Map.Entry<String, String>> it = map.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry<String, String> entry = it.next();
            String key = entry.getKey();
            String newKey = toFormatCol(key);
            String value = entry.getValue();
            newMap.put(newKey, value);
        }
        map=null;
        return newMap;
    }

    public static String toFormatCol(String colName) {
        StringBuilder sb = new StringBuilder();
        String[] str = colName.split("_");
        int i = 0;
        for (String s : str) {
            if (s.length() == 1) {
                s = s.toUpperCase();
            }
            i++;
            if (i == 1) {
                sb.append(s);
                continue;
            }
            if (s.length() > 0) {
                sb.append(s.substring(0, 1).toUpperCase());
                sb.append(s.substring(1));
            }
        }
        return sb.toString();
    }

    public static Map<String, List<Message>> resolveRecords(List<ConsumerRecord<String,String>> records, String groupId){
        Map<String,List<Message>>  map=  new HashMap<>();
        List<Message> updateList = new ArrayList<>();
        List<Message> deleteList = new ArrayList<>();
        List<Message> insertList = new ArrayList<>();
  
        for (ConsumerRecord<String, String> record : records) {
            String messageStr = record.value();
            Message message = JSON.parseObject(messageStr, new TypeReference<Message>() {});
            Map<String, String> contentMap = message.getData().get(0);
            Map<String, String> resultMap = CommTool.formatMapKey(contentMap);
            message.setData(null);
            Gson gson = new GsonBuilder().setDateFormat("yyyy-MM-dd HH:mm:ss").create();
            String jsonStr = gson.toJson(resultMap);
            Project project = gson.fromJson(jsonStr, Project.class);
            message.setEntity(project);
            if(update.equals(message.getSqlType())){
                updateList.add(message);
            }
            if(insert.equals(message.getSqlType())){
                insertList.add(message);
            }
            if(delete.equals(message.getSqlType())){
                deleteList.add(message);
            }
        }
        if(updateList.size()>0){
            map.put(update,updateList);
        }
        if(insertList.size()>0){
            map.put(insert,insertList);
        }
        if(deleteList.size()>0){
            map.put(delete,deleteList);
        }
        return  map;
    }
}

参考:
https://www.jianshu.com/p/a64defb44a23
https://blog.csdn.net/russle/article/details/81258590
https://blog.csdn.net/qq_26869339/article/details/88324980
https://blog.csdn.net/menxin_job/article/details/86712973

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值