kafka常用代码

最新推荐文章于 2024-09-04 10:38:14 发布

一个向往阿里的小小白

最新推荐文章于 2024-09-04 10:38:14 发布

阅读量610

点赞数

分类专栏：大数据文章标签： kafka-java

本文链接：https://blog.csdn.net/qq_23873703/article/details/78617439

版权

大数据专栏收录该内容

5 篇文章 0 订阅

订阅专栏

1，消费者代码

package com.iflytek.spark.test;

import java.text.MessageFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;



import com.iflytek.spark.bean.SysCode;

import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;

/**
 * 同一consumer group的多线程消费可以两种方法实现：
 * <p>
 * 1、实现单线程客户端，启动多个去消费
 * </p>
 * <p>
 * 2、在客户端的createMessageStreams里为topic指定大于1的线程数，再启动多个线程处理每个stream
 * </p>
 */
public class JConsumer extends Thread {

    private ConsumerConnector consumer;
    private String topic;
    private final int SLEEP = 20;

    public JConsumer(String topic) {
        consumer = Consumer.createJavaConsumerConnector(this.consumerConfig());
        this.topic = topic;
    }

    private ConsumerConfig consumerConfig() {
        Properties props = new Properties();
        props.put("zookeeper.connect", SysCode.CDH_ZKHOST);
        props.put("group.id", "jd-group");
        props.put("auto.commit.enable", "true");// 默认为true，让consumer定期commit offset，zookeeper会将offset持久化，否则只在内存，若故障则再消费时会从最后一次保存的offset开始
        props.put("auto.commit.interval.ms", 1000+"");// 经过INTERVAL时间提交一次offset
        props.put("auto.offset.reset", "largest");// What to do when there is no initial offset in ZooKeeper or if an offset is out of range
        props.put("zookeeper.session.timeout.ms", 4000 + "");
        props.put("zookeeper.sync.time.ms", "200");
        props.put("auto.offset.reset", "largest");
        return new ConsumerConfig(props);
    }

    @Override
    public void run() {
        Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
        topicCountMap.put(topic, new Integer(1));// 线程数
        Map<String, List<KafkaStream<byte[], byte[]>>> streams = consumer.createMessageStreams(topicCountMap);
        KafkaStream<byte[], byte[]> stream = streams.get(topic).get(0);// 若上面设了多个线程去消费，则这里需为每个stream开个线程做如下的处理

        ConsumerIterator<byte[], byte[]> it = stream.iterator();
        MessageAndMetadata<byte[], byte[]> messageAndMetaData = null;
        while (it.hasNext()) {
            messageAndMetaData = it.next();
            System.out.println(MessageFormat.format("Receive->[ message:{0} , partition:{1} , offset:{2} ]",
                    new String(messageAndMetaData.message()),
                    messageAndMetaData.partition() + "", messageAndMetaData.offset() + ""));
            try {
                sleep(SLEEP);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }
    }

    public static void main(String[] args) {
        JConsumer con = new JConsumer("102011");
        con.start();
    }
}

2，生产者代码

package com.iflytek.spark.test;

import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

import com.iflytek.spark.bean.SysCode;

import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;

/**
 * 可以指定规则(key和分区函数)以让消息写到特定分区：
 * <p>
 * 1、若发送的消息没有指定key则Kafka会随机选择一个分区
 * </p>
 * <p>
 * 2、否则，若指定了分区函数(通过partitioner.class)则该函数以key为参数确定写到哪个分区
 * </p>
 * <p>
 * 3、否则，Kafka根据hash(key)%partitionNum确定写到哪个分区
 * </p>
 */
public class JProducer extends Thread {
    private Producer<String, String> producer;
    private String topic;
    private final int SLEEP = 10;
    private final int msgNum = 1000;

    public JProducer(String topic) {
        Properties props = new Properties();
        props.put("metadata.broker.list", SysCode.KAFKAHOST);// 如192.168.6.127:9092,192.168.6.128:9092
        // request.required.acks
        // 0, which means that the producer never waits for an acknowledgement from the broker (the same behavior as 0.7). This option provides the lowest latency but the weakest durability guarantees
        // (some data will be lost when a server fails).
        // 1, which means that the producer gets an acknowledgement after the leader replica has received the data. This option provides better durability as the client waits until the server
        // acknowledges the request as successful (only messages that were written to the now-dead leader but not yet replicated will be lost).
        // -1, which means that the producer gets an acknowledgement after all in-sync replicas have received the data. This option provides the best durability, we guarantee that no messages will be
        // lost as long as at least one in sync replica remains.
        props.put("request.required.acks", "-1");
        // 配置value的序列化类
        props.put("serializer.class", "kafka.serializer.StringEncoder");
        // 配置key的序列化类
        props.put("key.serializer.class", "kafka.serializer.StringEncoder");
        // 提供自定义的分区函数将消息写到分区上，未指定的话Kafka根据hash(messageKey)%partitionNum确定写到哪个分区
//        props.put("partitioner.class", "com.zsm.kfkdemo.MyPartitioner");
        producer = new Producer<String, String>(new ProducerConfig(props));
        this.topic = topic;
    }

    @Override
    public void run() {
        boolean isBatchWriteMode = false;
        System.out.println("isBatchWriteMode: " + isBatchWriteMode);
        if (isBatchWriteMode) {
            // 批量发送
            int batchSize = 100;
            List<KeyedMessage<String, String>> msgList = new ArrayList<KeyedMessage<String, String>>(batchSize);
            for (int i = 0; i < msgNum; i++) {
                String msg = "Message_" + i;
                msgList.add(new KeyedMessage<String, String>(topic, i + "", msg));
                // msgList.add(new KeyedMessage<String, String>(topic, msg));//未指定key，Kafka会自动选择一个分区
                if (i % batchSize == 0) {
                    producer.send(msgList);
                    System.out.println("Send->[" + msgList + "]");
                    msgList.clear();
                    try {
                        sleep(SLEEP);
                    } catch (Exception ex) {
                        ex.printStackTrace();
                    }
                }
            }
            producer.send(msgList);
        } else {
            // 单个发送
            for (int i = 0; i < msgNum; i++) {
                KeyedMessage<String, String> msg = new KeyedMessage<String, String>(topic, i + "", "Message_" + i);
                // KeyedMessage<String, String> msg = new KeyedMessage<String, String>(topic, "Message_" + i);//未指定key，Kafka会自动选择一个分区
                producer.send(msg);
                System.out.println("Send->[" + msg + "]");
                try {
                    sleep(SLEEP);
                } catch (Exception ex) {
                    ex.printStackTrace();
                }
            }
        }

        System.out.println("send done");
    }

    public static void main(String[] args) {
        JProducer pro = new JProducer("test1");
        pro.start();
    }
}

3，topic代码

 package com.iflytek.spark.test;

import com.iflytek.spark.bean.SysCode;

import kafka.admin.TopicCommand;


public class JTopic {
    public static void createTopic(String zkAddr, String topicName, int partition, int replication) {
        String[] options = new String[] { "--create", "--zookeeper", zkAddr, "--topic", topicName, "--partitions",
                partition + "", "--replication-factor", replication + "" };
        TopicCommand.main(options);
    }

    public static void listTopic(String zkAddr) {
        String[] options = new String[] { "--list", "--zookeeper", zkAddr };
        TopicCommand.main(options);
    }

    public static void describeTopic(String zkAddr, String topicName) {
        String[] options = new String[] { "--describe", "--zookeeper", zkAddr, "--topic", topicName, };
        TopicCommand.main(options);
    }

    public static void alterTopic(String zkAddr, String topicName) {
        String[] options = new String[] { "--alter", "--zookeeper", zkAddr, "--topic", topicName, "--partitions", "5" };
        TopicCommand.main(options);
    }

    // 通过删除zk里面对应的路径来实现删除topic的功能,只会删除zk里面的信息，Kafka上真实的数据并没有删除
    public static void deleteTopic(String zkAddr, String topicName) {
        String[] options = new String[] { "--zookeeper", zkAddr, "--topic", topicName };
        TopicCommand.main(options);
    }

    public static void main(String[] args) {
        // TODO Auto-generated method stub

        String myTestTopic = "test2";
        int myPartition = 4;
        int myreplication = 1;

//        createTopic(SysCode.ZKHOST, myTestTopic, myPartition, myreplication);
        listTopic(SysCode.ZKHOST);
//        describeTopic(SysCode.ZKHOST, myTestTopic);
//         alterTopic(SysCode.ZKHOST, myTestTopic);
//         deleteTopic(SysCode.ZKHOST, myTestTopic);
    }

}

4，补充方法代码

package com.iflytek.spark.test;


import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;  
import java.util.Arrays;
import java.util.HashMap;  
import java.util.Iterator;
import java.util.List;  
import java.util.Map;  
import java.util.Map.Entry;  
import java.util.Properties;  
import java.util.concurrent.ExecutorService;  
import java.util.concurrent.Executors;  

import kafka.admin.AdminUtils;
import kafka.server.ConfigType;
import kafka.utils.ZkUtils;

import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.security.JaasUtils;
import org.slf4j.Logger;  
import org.slf4j.LoggerFactory;  

import com.iflytek.spark.bean.SysCode;

import kafka.admin.AdminUtils;
import kafka.admin.RackAwareMode;
import kafka.admin.TopicCommand;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;  
import kafka.consumer.ConsumerIterator;  
import kafka.consumer.KafkaStream;  
import kafka.javaapi.consumer.ConsumerConnector;  
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;  
import kafka.producer.ProducerConfig;  
import kafka.serializer.StringDecoder;
import kafka.server.ConfigType;
import kafka.utils.VerifiableProperties;
import kafka.utils.ZkUtils;

public class KafkaTest {  

    private static final Logger LOG = LoggerFactory.getLogger(KafkaTest.class);  
//  
//    //配置信息  
//    private Properties prop;  
//  
//    // 话题名称  
//    private final String topic;  
//  
//    // 线程数量，与kafka分区数量相同  
//    private final int threadNum;  
//  
//    private int key = 0;  
//  
//    public KafkaTest(Properties prop) {  
//          
//        this.prop = prop;  
//        topic = prop.getProperty("kafka.topic");  
//        threadNum = Integer.parseInt(prop.getProperty("thread.count"));  
//          
//    }  

   /**
    * 创建topic
    * @param topic
    * @param zkHost
    */
    public static void createTopic(String topic,String zkHost){

        ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
//       创建一个单分区单副本名为t1的topic
        AdminUtils.createTopic(zkUtils, topic, 1, 1, new Properties(), RackAwareMode.Enforced$.MODULE$);
        zkUtils.close();
    } 

    /**
     * 删除topic
     * @param topic
     * @param zkHost
     */
    public static void deleteTopic(String topic,String zkHost){

        ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
//       删除topic 't1'
        AdminUtils.deleteTopic(zkUtils, topic);
        zkUtils.close();
    }

    /**
     * 查询topic
     * @param topic
     * @param zkHost
     */
    public static void queryTopic(String topic,String zkHost){

        ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
        // 获取topic 'test'的topic属性属性
        Properties props = AdminUtils.fetchEntityConfig(zkUtils, ConfigType.Topic(), topic);
        // 查询topic-level属性
        Iterator it = props.entrySet().iterator();
        while(it.hasNext()){
            Map.Entry entry=(Map.Entry)it.next();
            Object key = entry.getKey();
            Object value = entry.getValue();
            System.out.println(key + " = " + value);
        }
        zkUtils.close();
    }

    /**
     * 更新topic
     * @param topic
     * @param zkHost
     */
    public static void updateTopic(String topic,String zkHost){

        ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
        Properties props = AdminUtils.fetchEntityConfig(zkUtils, ConfigType.Topic(), topic);
        // 增加topic级别属性
        props.put("min.cleanable.dirty.ratio", "0.3");
        // 删除topic级别属性
        props.remove("max.message.bytes");
        // 修改topic 'test'的属性
        AdminUtils.changeTopicConfig(zkUtils, topic, props);
        zkUtils.close();
    }

    /**
     * 消费者
     * @param topic
     * @param props
     */
    public static void consume(String topic,Properties props) {
        ConsumerConfig config = new ConsumerConfig(props);
        ConsumerConnector consumer = Consumer.createJavaConsumerConnector(config);

        Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
        topicCountMap.put(topic, new Integer(1));

        StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
        StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

        Map<String, List<KafkaStream<String, String>>> consumerMap = 
                consumer.createMessageStreams(topicCountMap,keyDecoder,valueDecoder);
        KafkaStream<String, String> stream = consumerMap.get(topic).get(0);
        ConsumerIterator<String, String> it = stream.iterator();
        while (it.hasNext()){
            System.out.println(it.next().message());
        }
    }


    /**
     * kafka消费者
     * @param topic
     * @param zkHost
     */
    public static void kafkaConsumer(String topic,String zkHost) {
        Properties props = new Properties();
        //zookeeper 配置
        props.put("zookeeper.connect", zkHost);
        //group 代表一个消费组
        props.put("group.id", "jd-group");
        //zk连接超时
        props.put("zookeeper.session.timeout.ms", "4000");
        props.put("zookeeper.sync.time.ms", "200");
        props.put("auto.commit.interval.ms", "1000");
        props.put("auto.offset.reset", "smallest");
        //序列化类
        props.put("serializer.class", "kafka.serializer.StringEncoder");
        consume(topic,props);
    }

    /** ================================下面代码有问题，有待改进========================================== **/

    public static void producer(String topic, String key, String value, Properties props) {

        //实例化producer
        KafkaProducer<String, String> kafkaProducer = new KafkaProducer<String,String>(props);

        ProducerRecord<String, String> producerRecord = new ProducerRecord<String, String>(topic, key , value);

        for(int i = 0 ; i < 10 ; i++){
            kafkaProducer.send(new ProducerRecord<String, String>(topic, i+""));
            System.out.println(i);
        }
//      kafkaProducer.send(producerRecord, new Callback() {
//          
//          @Override
//          public void onCompletion(RecordMetadata recordMetadata, Exception exception) {
//              // TODO Auto-generated method stub
//              if(null != exception){
//                  LOG.info(String.format("发送数据出错 错误：%s", exception.getMessage()));
//              }
//          }
//      });
        kafkaProducer.close();
    }


    public static void kafkaProducer(String topic, String key, String value) {

        Properties props = new Properties();
        props.put("bootstrap.servers", SysCode.KAFKAHOST); 
        props.put("acks", "all"); 
        props.put("retries",0);
        props.put("batch.size",16384);
        props.put("linger.ms",1);
        props.put("buffer.memory", 33554432);
        props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
        producer(topic, key, value,props);
    }

    /**
     * 查询所有的topic
     * @param zkHost
     */
    public static List<String> queryAllTopics(String zkHost){
        String [] options = new String[]{
                "--list",
                "--zookeeper",
                zkHost
        };
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(1024*3);

        PrintStream printStream = new PrintStream(byteArrayOutputStream);

        PrintStream oldPrintStream = System.out;

        System.setOut(printStream);

        TopicCommand.main(options);

        String message = byteArrayOutputStream.toString();

        List<String> list = new ArrayList<String>();

        String[] ss = message.split("\n");

        list = Arrays.asList(ss);

        System.setOut(oldPrintStream);

        for(int i = 0 ; i < ss.length ; i++){
            System.out.println(list.get(i));
        }

        return list;
    }



    /** 
     * 发送信息到kafka(key为null) 
     */  
//    public void simpleAddQueue(String... msgs) {  
//        Producer<String, String> producer = new Producer<String, String>(new ProducerConfig(prop));  
//        List<KeyedMessage<String, String>> data = new ArrayList<KeyedMessage<String, String>>();  
//        for (String msg : msgs) {  
//            data.add(new KeyedMessage<String, String>(topic, msg));  
//            LOG.debug("加入kafka队列:主题[" + topic + "];消息[" + msg + "]");  
//        }  
//        if (!data.isEmpty()) {  
//            producer.send(data);  
//            LOG.debug("发送kafka成功！");  
//        }  
//        // 关闭producer  
//        producer.close();  
//    }  

    /** 
     * 键值对形式发送消息到kafka 
     */  
//    public void addQueue(Map<String, List<String>> msgs) {  
//        Producer<String, String> producer = new Producer<String, String>(new ProducerConfig(prop));  
//        List<KeyedMessage<String, String>> data = new ArrayList<KeyedMessage<String, String>>();  
//        for (Entry<String, List<String>> entry : msgs.entrySet()) {  
//            for (String msg : entry.getValue()) {  
//                data.add(new KeyedMessage<String, String>(topic, entry.getKey(), msg));  
//                LOG.debug("加入kafka队列:主题[" + topic + "];key[" + entry.getKey() + "];消息[" + msg + "]");  
//            }  
//        }  
//        if (!data.isEmpty()) {  
//            producer.send(data);  
//            LOG.debug("发送kafka成功！");  
//        }  
//        producer.close();  
//    }  

    /** 
     * 根据threadNum平均发给每一个kafka分区 
     */  
//    public void addQueue(String... msgs) {  
//        Map<String, List<String>> map = new HashMap<String, List<String>>();  
//        for (String msg : msgs) {  
//            key = key >= threadNum ? 0 : key;  
//            if (!map.containsKey(key + "")) {  
//                map.put(key + "", new ArrayList<String>());  
//            }  
//            map.get(key + "").add(msg);  
//            key++;  
//            if(key > Integer.MAX_VALUE/2){  
//                key = 0;  
//            }  
//        }  
//        addQueue(map);
//    }  

    /** 
     * 获得默认的kafka消费流列表 
     */  
//    public List<KafkaStream<byte[], byte[]>> getStream() {  
//        ConsumerConnector consumerConnector = Consumer  
//                .createJavaConsumerConnector(new ConsumerConfig(prop));  
//  
//        Map<String, Integer> map = new HashMap<String, Integer>();  
//        map.put(topic, threadNum);  
//        Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumerConnector  
//                .createMessageStreams(map);  
//        return consumerMap.get(topic);  
//    }  

    /** 
     * 根据groupId获得kafka消费流列表 
     */  
//    public List<KafkaStream<byte[], byte[]>> getStream(String groupId) {  
//        prop.setProperty("group.id", groupId);  
//        return getStream();  
//    }  
//  
    /** 
     * 获得话题 
     */  
//    public String getTopic() {  
//        return topic;  
//    }  
//  
//    /** 
//     * 获得进程数，与kafka分区patition数相同 
//     */  
//    public int getThreadNum() {  
//        return threadNum;  
//    }  

    /** 
     * 测试发送 
     */  
//    public static void testSendKfk(){  
//          
//        Properties conf = new Properties();  
//        conf.put("metadata.broker.list", "10.30.15.55:39091,10.30.15.56:39091,10.30.15.57:39091");  
//        conf.put("kafka.topic", "SparkTest");  
//        conf.put("serializer.class", "kafka.serializer.StringEncoder");  
//        conf.put("key.serializer.class", "kafka.serializer.StringEncoder");  
//        conf.put("thread.count", "1");  
//          
//        KafkaTest kfk = new KafkaTest(conf);  
//        //for(int i=0;i<10;i++){  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        LOG.info("发送完毕");  
//        //}  
//    }  
//      
//    /** 
//     * 测试消费 
//     */  
//    public static void testConsumer(){  
//          
//        Properties conf = new Properties();  
//        conf.put("kafka.topic", "SparkTest");  
//        conf.put("thread.count", "1");  
//        conf.put("zookeeper.connect", SysCode.ZKHOST);  
//        conf.put("zookeeper.connectiontimeout.ms", "30000");  
//        conf.put("zookeeper.session.timeout.ms", "800");  
//        conf.put("zookeeper.sync.time.ms", "200");  
//        conf.put("auto.commit.interval.ms", "1000");  
//        conf.put("auto.offset.reset", "smallest");  
//        conf.put("kafka.topic", "SparkTest");  
//          
//          
//          
//        KafkaTest kfk = new KafkaTest(conf);  
//          
//        List<KafkaStream<byte[], byte[]>> result = kfk.getStream();  
//        // 线程池  
//        ExecutorService executor = Executors.newFixedThreadPool(kfk.getThreadNum());  
//        for (final KafkaStream<byte[], byte[]> stream : result) {  
//             executor.submit(new ConsumerThread(stream));  
//               
//        }  
//    }  

    public static void main(String args[]){  
//        createTopic("SparkTest", SysCode.ZKHOST);
//        deleteTopic("test1", SysCode.CDH_ZKHOST);
//        queryTopic("test",SysCode.CDH_ZKHOST);
//        updateTopic("test",SysCode.CDH_ZKHOST);
//        kafkaProducer("test", "hmwang", "22222");//有问题
        kafkaConsumer("test1", SysCode.ZKHOST);
    }  

}