java编写demo实现kafka生产消费

kafka是吞吐量巨大的一个消息系统,它是用Scala写的,和普通的消息的生产消费还有所不同,写了个demo程序供大家参考。kafka的安装请参考官方文档。

首先我们需要新建一个maven项目,然后在pom中引用kafka jar包,引用依赖如下:

[xml]  view plain  copy
  1. <dependency>  
  2.     <groupId>org.apache.kafka</groupId>  
  3.     <artifactId>kafka_2.10</artifactId>  
  4.     <version>0.8.0</version>  
  5. </dependency>  

我们用的版本是0.8, 下面我们看下生产消息的代码:

[java]  view plain  copy
  1. package cn.outofmemory.kafka;  
  2.   
  3. import java.util.Properties;  
  4.   
  5. import kafka.javaapi.producer.Producer;  
  6. import kafka.producer.KeyedMessage;  
  7. import kafka.producer.ProducerConfig;  
  8.   
  9. /** 
  10.  * Hello world! 
  11.  * 
  12.  */  
  13. public class KafkaProducer   
  14. {  
  15.     private final Producer<String, String> producer;  
  16.     public final static String TOPIC = "TEST-TOPIC";  
  17.   
  18.     private KafkaProducer(){  
  19.         Properties props = new Properties();  
  20.         //此处配置的是kafka的端口  
  21.         props.put("metadata.broker.list""ip:9092");  
  22.   
  23.         //配置value的序列化类  
  24.         props.put("serializer.class""kafka.serializer.StringEncoder");  
  25.         //配置key的序列化类  
  26.         props.put("key.serializer.class""kafka.serializer.StringEncoder");  
  27.   
  28.         //request.required.acks  
  29.         //0, which means that the producer never waits for an acknowledgement from the broker (the same behavior as 0.7). This option provides the lowest latency but the weakest durability guarantees (some data will be lost when a server fails).  
  30.         //1, which means that the producer gets an acknowledgement after the leader replica has received the data. This option provides better durability as the client waits until the server acknowledges the request as successful (only messages that were written to the now-dead leader but not yet replicated will be lost).  
  31.         //-1, which means that the producer gets an acknowledgement after all in-sync replicas have received the data. This option provides the best durability, we guarantee that no messages will be lost as long as at least one in sync replica remains.  
  32.         props.put("request.required.acks","-1");  
  33.   
  34.         producer = new Producer<String, String>(new ProducerConfig(props));  
  35.     }  
  36.   
  37.     void produce() {  
  38.         int messageNo = 1000;  
  39.         final int COUNT = 10000;  
  40.   
  41.         while (messageNo < COUNT) {  
  42.             String key = String.valueOf(messageNo);  
  43.             String data = "hello kafka message " + key;  
  44.             producer.send(new KeyedMessage<String, String>(TOPIC, key ,data));  
  45.             System.out.println(data);  
  46.             messageNo ++;  
  47.         }  
  48.     }  
  49.   
  50.     public static void main( String[] args )  
  51.     {  
  52.         new KafkaProducer().produce();  
  53.     }  
  54. }  

下面是消费端的代码实现:

[java]  view plain  copy
  1. package cn.outofmemory.kafka;  
  2.   
  3. import java.util.HashMap;  
  4. import java.util.List;  
  5. import java.util.Map;  
  6. import java.util.Properties;  
  7.   
  8. import kafka.consumer.ConsumerConfig;  
  9. import kafka.consumer.ConsumerIterator;  
  10. import kafka.consumer.KafkaStream;  
  11. import kafka.javaapi.consumer.ConsumerConnector;  
  12. import kafka.serializer.StringDecoder;  
  13. import kafka.utils.VerifiableProperties;  
  14.   
  15. public class KafkaConsumer {  
  16.   
  17.     private final ConsumerConnector consumer;  
  18.   
  19.     private KafkaConsumer() {  
  20.         Properties props = new Properties();  
  21.         //zookeeper 配置  
  22.         props.put("zookeeper.connect""ip:2181");  
  23.   
  24.         //group 代表一个消费组  
  25.         props.put("group.id""jd-group");  
  26.   
  27.         //zk连接超时  
  28.         props.put("zookeeper.session.timeout.ms""4000");  
  29.         props.put("zookeeper.sync.time.ms""200");  
  30.         props.put("auto.commit.interval.ms""1000");  
  31.         props.put("auto.offset.reset""smallest");  
  32.         //序列化类  
  33.         props.put("serializer.class""kafka.serializer.StringEncoder");  
  34.   
  35.         ConsumerConfig config = new ConsumerConfig(props);  
  36.   
  37.         consumer = kafka.consumer.Consumer.createJavaConsumerConnector(config);  
  38.     }  
  39.   
  40.     void consume() {  
  41.         Map<String, Integer> topicCountMap = new HashMap<String, Integer>();  
  42.         topicCountMap.put(KafkaProducer.TOPIC, new Integer(1));  
  43.   
  44.         StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());  
  45.         StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());  
  46.   
  47.         Map<String, List<KafkaStream<String, String>>> consumerMap =   
  48.                 consumer.createMessageStreams(topicCountMap,keyDecoder,valueDecoder);  
  49.         KafkaStream<String, String> stream = consumerMap.get(KafkaProducer.TOPIC).get(0);  
  50.         ConsumerIterator<String, String> it = stream.iterator();  
  51.         while (it.hasNext())  
  52.             System.out.println(it.next().message());  
  53.     }  
  54.   
  55.     public static void main(String[] args) {  
  56.         new KafkaConsumer().consume();  
  57.     }  
  58. }  

注意消费端需要配置成zk的地址,而生产端配置的是kafka的ip和端口。

来自:http://outofmemory.cn/code-snippet/33051/Java-kafka-producer-consumer-example

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是使用Java编写Flink消费Kafka写入Hive的示例代码: 1. 导入依赖 ```java import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper; import org.apache.flink.streaming.util.serialization.SimpleStringSchema; ``` 2. 配置Kafka连接 ```java String kafkaBootstrapServers = "localhost:9092"; String kafkaTopic = "test"; Properties kafkaProps = new Properties(); kafkaProps.setProperty("bootstrap.servers", kafkaBootstrapServers); kafkaProps.setProperty("group.id", "flink-group"); ``` 3. 创建 Flink 环境和 Kafka 消费者 ```java StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> kafkaStream = env.addSource(new FlinkKafkaConsumer<>(kafkaTopic, new SimpleStringSchema(), kafkaProps)); ``` 4. 对收到的消息进行处理 ```java DataStream<String> processedStream = kafkaStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里对数据进行处理,返回处理后的数据 return value; } }); ``` 5. 将处理后的数据写入 Hive ```java String hiveTableName = "test"; String hiveMetastoreUri = "thrift://localhost:9083"; String hiveDbName = "default"; String hivePartitionColumn = "dt"; String hivePartitionValue = "20220101"; String hiveOutputPath = "/user/hive/warehouse/" + hiveDbName + ".db/" + hiveTableName + "/" + hivePartitionColumn + "=" + hivePartitionValue; DataStream<String> hiveDataStream = processedStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里将数据转换为 Hive 表的格式,返回转换后的数据 return value; } }); // 将数据写入 Hive hiveDataStream.addSink(new FlinkHiveOutputFormat<>(new Path(hiveOutputPath), new org.apache.hadoop.hive.ql.io.orc.OrcSerde(), new Object[]{})); ``` 6. 将处理后的数据写回 Kafka ```java String kafkaOutputTopic = "output"; FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(kafkaBootstrapServers, kafkaOutputTopic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), kafkaProps); // 将数据写回 Kafka processedStream.addSink(kafkaProducer); ``` 完整示例代码: ```java import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchemaWrapper; import org.apache.flink.streaming.util.serialization.SimpleStringSchema; import java.util.Properties; public class FlinkKafkaToHiveDemo { public static void main(String[] args) throws Exception { String kafkaBootstrapServers = "localhost:9092"; String kafkaTopic = "test"; Properties kafkaProps = new Properties(); kafkaProps.setProperty("bootstrap.servers", kafkaBootstrapServers); kafkaProps.setProperty("group.id", "flink-group"); String hiveTableName = "test"; String hiveMetastoreUri = "thrift://localhost:9083"; String hiveDbName = "default"; String hivePartitionColumn = "dt"; String hivePartitionValue = "20220101"; String hiveOutputPath = "/user/hive/warehouse/" + hiveDbName + ".db/" + hiveTableName + "/" + hivePartitionColumn + "=" + hivePartitionValue; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> kafkaStream = env.addSource(new FlinkKafkaConsumer<>(kafkaTopic, new SimpleStringSchema(), kafkaProps)); DataStream<String> processedStream = kafkaStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里对数据进行处理,返回处理后的数据 return value; } }); DataStream<String> hiveDataStream = processedStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里将数据转换为 Hive 表的格式,返回转换后的数据 return value; } }); DataStream<String> kafkaOutputStream = processedStream.map(new MapFunction<String, String>() { @Override public String map(String value) throws Exception { // 在这里对数据进行处理,返回处理后的数据 return value; } }); FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(kafkaBootstrapServers, kafkaOutputTopic, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), kafkaProps); processedStream.addSink(kafkaProducer); hiveDataStream.addSink(new FlinkHiveOutputFormat<>(new Path(hiveOutputPath), new org.apache.hadoop.hive.ql.io.orc.OrcSerde(), new Object[]{})); env.execute("FlinkKafkaToHiveDemo"); } } ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值