import java.util.Properties
import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroSerializer}
import org.apache.avro.Schema
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.config.SslConfigs
import org.apache.spark.sql.DataFrame
object KafkaProducerDF {
val kafkaParams: Map[String, Object] = Map(
CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> "localhost:9092",
CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> "SSL",
SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG -> "/path/to/truststore.jks",
SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG -> "truststore-password",
SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG -> "/path/to/keystore.jks",
SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG -> "keystore-password",
SslConfigs.SSL_KEY_PASSWORD_CONFIG -> "key-password",
"key.serializer" -> "org.apache.kafka.common.serialization.StringSerializer",
"value.serializer" -> classOf[KafkaAvroSerializer],
AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> "https://schema-registry-url:8081",
AbstractKafkaAvroSerDeConfig.VALUE_SUBJECT_NAME_STRATEGY -> "io.confluent.kafka.serializers.subject.TopicNameStrategy"
)
def writeToKafka(df: DataFrame, topic: String, schema: String): Unit = {
df.foreachPartition { partition =>
val props = new Properties()
props.putAll(kafkaParams.asJava)
val producer = new KafkaProducer[String, AnyRef](props)
val avroSchema = new Schema.Parser().parse(schema)
partition.foreach { row =>
val key = row.getAs[String]("key")
val value = AvroUtils.toAvro(row, avroSchema)
val record = new ProducerRecord[String, AnyRef](topic, key, value)
producer.send(record)
}
producer.flush()
producer.close()
}
}
}
object AvroUtils {
def toAvro(row: Row, schema: Schema): AnyRef = {
val builder = new GenericRecordBuilder(schema)
for (i <- 0 until row.length) {
builder.set(schema.getFields.get(i).name(), row.get(i))
}
builder.build()
}
}