pom.xml
<dependencies>
<!-- 导入scala的依赖 -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<!-- kafka依赖jar包-->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.version}</version>
</dependency>
</dependencies>
生产者:Producer
package com.yege.kafkademo
import java.util.Properties
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
object ProducerDemo {
def main(args: Array[String]): Unit = {
// 1 配置参数
val props = new Properties()
// 连接kafka节点
props.setProperty("bootstrap.servers", "linux01:9092,linux02:9092,linux03:9092")
//指定key序列化方式
props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
//指定value序列化方式
props.setProperty("value.serializer", classOf[StringSerializer].getName) // 两种写法都行
val topic = "wordcount"
// 2 kafka的生产者
val producer: KafkaProducer[String, String] = new KafkaProducer[String, String](props)
for (i <- 1 to 10) {
// 3 封装的对象
//将数据发送到指定的分区编号
val record = new ProducerRecord[String, String](topic, 2 , null,"myvalue:"+i)
//val partitionNum = i % 3 // 指定数据均匀写入3个分区中
//val record = new ProducerRecord[String, String](topic, partitionNum, null,"myvalue:"+i)
//不指定分区编号,指定key, 分区编号 = key.hasacode % 3
//相同key的数据一定会到kafka的同一个分区,但是同一个分区中可以有多个key的数据
//val record = new ProducerRecord[String, String](topic , "test1","myvalue:"+i)
//根据key的hashcode值模除以topic分区的数量,返回一个分区编号
//val record = new ProducerRecord[String, String](topic , UUID.randomUUID().toString ,"myvalue:"+i)
//没有指定Key和分区,默认的策略就是轮询,将数据均匀写入多个分区中
//val record = new ProducerRecord[String, String](topic,"value-" + i)
// 4 发送消息
producer.send(record)
}
println("message send success")
// 释放资源
producer.close()
}
}
消费者:Consumer
package com.yege.kafkademo
import java.time.Duration
import java.util.Properties
import org.apache.kafka.clients.consumer.{ConsumerRecords, KafkaConsumer}
import org.apache.kafka.common.serialization.StringDeserializer
object ConsumerDemo {
def main(args: Array[String]): Unit = {
// 1 配置参数
val props = new Properties()
//从哪些broker消费数据
props.setProperty("bootstrap.servers", "linux01:9092,linux01:9092,linux01:9092")
// 反序列化的参数
props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
props.setProperty("value.deserializer",classOf[StringDeserializer].getName)
// 指定group.id
props.setProperty("group.id","g106")
// 指定消费的offset从哪里开始
//earliest:从头开始 --from-beginning
//latest:从消费者启动之后
props.setProperty("auto.offset.reset","earliest") //[latest, earliest, none]
// 是否自动提交偏移量 offset
// enable.auto.commit 默认值就是true【5秒钟更新一次】,消费者定期会更新偏移量 groupid,topic,parition -> offset
//props.setProperty("enable.auto.commit", "false") // 不让kafka自动维护偏移量 手动维护偏移量
//enable.auto.commit 5000
// 2 消费者的实例对象
val consumer: KafkaConsumer[String, String] = new KafkaConsumer[String, String](props)
// 订阅 参数类型 java的集合
val topic: java.util.List[String] = java.util.Arrays.asList("wordcount")
// 3 订阅主题
consumer.subscribe(topic)
while (true){
// 4 拉取数据
val msgs: ConsumerRecords[String, String] = consumer.poll(Duration.ofMillis(2000))
//导入隐式转换
import scala.collection.JavaConverters._
//将Java的集合或迭代器转成Scala的集合或迭代器
for(cr <- msgs.asScala){
//ConsumerRecord[String, String]
println(cr)
}
}
//consumer.close()
}
}
输出: