关闭

kafka新的producer api使用

5986人阅读 评论(1) 收藏 举报
分类:
package com.hupu.dace.spark.streaming

import java.util.Properties

import com.hupu.dace.hbaserestful.util.HdfsUtil
import DaceFunctions._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}

/**
 * Created by xiaojun on 2015/5/20.
 */
object PalPVUVProducer {
  def main(args: Array[String]) {
    if (args.length < 2) {
      System.err.println("Usage: PVUVProducer <metadataBrokerList> <topic> ")
      System.exit(1)
    }

    val Array(brokers, topicName, _*) = args

    // Zookeper connection properties
    val props = new Properties()
    (2 until args.length).foreach(i => {
      val pieces = args(i).split("=")
      if (pieces.length != 2) throw new IllegalArgumentException("Invalid property: " + args(i))
      props.put(pieces(0), pieces(1))
    })

    props.put("metadata.broker.list", brokers)
    //props.put("serializer.class", "kafka.serializer.StringEncoder")
    props.put("serializer.class", "org.apache.kafka.common.serialization.ByteArraySerializer")
    //props.put("bootstrap.servers", brokers)
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
    val producer: KafkaProducer[Array[Byte], Array[Byte]] = new KafkaProducer[Array[Byte], Array[Byte]](props)
    val lines = HdfsUtil.getString("/user/hive/warehouse/rd_e_pal/dt=20150312/hr=03/rd_e_pal.20150312.03-230").split("\n").filter(line => {
      val l = line.split("\001")
      l.length >= 31 && l(4).matches( """\d+""") && filter(l(5)) == "i" && filter(l(11)) != "\\N" && filter(l(11)) != null
    })
    val lines2 = HdfsUtil.getString("/user/hive/warehouse/rd_e_pal/dt=20150312/hr=03/rd_e_pal.20150312.03-231").split("\n").filter(line => {
      val l = line.split("\001")
      l.length >= 31 && l(4).matches( """\d+""") && filter(l(5)) == "i" && filter(l(11)) != "\\N" && filter(l(11)) != null
    })
    val start = System.currentTimeMillis()
    (1 to 1).foreach(n => {
      lines.foreach(line => {
        val record = new ProducerRecord[Array[Byte], Array[Byte]](topicName, line.getBytes())
        producer.send(record)
      })
      lines2.foreach(line => {
        val record = new ProducerRecord[Array[Byte], Array[Byte]](topicName, line.getBytes())
        producer.send(record)
      })
    })
    println("---------cost:" + (System.currentTimeMillis() - start))
    producer.close()

  }
}

kafka 8.2.1新版API是异步写的,效率非常高.

参数传递:

 n1:9092,n2:9092,n3:9092 test-rep-one buffer.memory=67108864 acks=1 bootstrap.servers=n1:9092,n2:9092,n3:9092 buffer.memory=67108864 batch.size=8196 



1
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:1589703次
    • 积分:15674
    • 等级:
    • 排名:第677名
    • 原创:301篇
    • 转载:139篇
    • 译文:1篇
    • 评论:272条
    文章分类
    最新评论