spark广播kafka sink工具类
kafkaSink工具类
import java.util.concurrent.Future
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerRecord,
RecordMetadata }
class KafkaSink[K, V](createProducer: () => KafkaProducer[K, V]) extends
Serializable {
// 这样能够避免运行时产生NotSerializableExceptions异常
lazy val producer = createProducer()
def send(topic: String, key: K, value: V): Future[RecordMetadata] =
// 写入Kafka
producer.send(new ProducerRecord[K, V](topic, key, value))
def send(topic: String, value: V): Future[RecordMetadata] =
producer.send(new ProducerRecord[K, V](topic, value))
}
object KafkaSink {
import scala.collection.JavaConversions._
def apply[K, V](config: Map[String, Object]): KafkaSink[K, V] = {
val createProducerFunc = () => {
// 新建KafkaProducer
val producer = new KafkaProducer[K, V](config)
sys.addShutdownHook {
// 确保在Executor的JVM关闭前
// Kafka Producer将缓存中的所有信息写入Kafka
producer.close()
}
producer
}
new KafkaSink(createProducerFunc)
}
def apply[K, V](config: java.util.Properties): KafkaSink[K, V] = apply
(config.toMap)
}
sink广播变量方法
// 广播KafkaSink
val kafkaProducer: Broadcast[KafkaSink[String, String]] = {
val kafkaProducerConfig = {
// 新建配置项
val p = new Properties()
// 配置broker
p.setProperty("bootstrap.servers", Conf.brokers)
// 序列化类型
p.setProperty("key.serializer", classOf[StringSerializer].getName)
p.setProperty("value.serializer", classOf[StringSerializer].getName)
p
}
log.warn("kafka producer init done! ")
// 广播KafkaSink写入对象
ssc.sparkContext.broadcast(KafkaSink[String, String](kafkaProducerConfig))
}
foreachRDD输出流API 中使用
//输出到Kafka
segmentedStream.foreachRDD(rdd => {
if (! rdd.isEmpty) {
rdd.foreach(record => {
kafkaProducer.value.send(Conf.outTopics, record._1.toString, record._2)
// 做一些必要的操作
})
}
})
连接池的使用与释放
rdd.foreachPartition { partitionOfRecords =>
// 连接池是静态,惰性初始化的连接池
val connection = ConnectionPool.getConnection()
partitionOfRecords.foreach(record => connection.send(record))
// 将连接返回连接池,以供继续使用
ConnectionPool.returnConnection(connection)
}
}
Kyro序列化注册
sparkConf.registerKryoClasses(Array(classOf[Int],classOf[String]))