package com.hupu.dace.spark.streaming
import java.util.Properties
import com.hupu.dace.hbaserestful.util.HdfsUtil
import DaceFunctions._
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
/**
* Created by xiaojun on 2015/5/20.
*/
object PalPVUVProducer {
def main(args: Array[String]) {
if (args.length < 2) {
System.err.println("Usage: PVUVProducer <metadataBrokerList> <topic> ")
System.exit(1)
}
val Array(brokers, topicName, _*) = args
// Zookeper connection properties
val props = new Properties()
(2 until args.length).foreach(i => {
val pieces = args(i).split("=")
if (pieces.length != 2) throw new IllegalArgumentException("Invalid property: " + args(i))
props.put(pieces(0), pieces(1))
})
props.put("metadata.broker.list", brokers)
//props.put("serializer.class", "kafka.serializer.StringEncoder")
props.put("serializer.class", "org.apache.kafka.common.serialization.ByteArraySerializer")
//props.put("bootstrap.servers", brokers)
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
val producer: KafkaProducer[Array[Byte], Array[Byte]] = new KafkaProducer[Array[Byte], Array[Byte]](props)
val lines = HdfsUtil.getString("/user/hive/warehouse/rd_e_pal/dt=20150312/hr=03/rd_e_pal.20150312.03-230").split("\n").filter(line => {
val l = line.split("\001")
l.length >= 31 && l(4).matches( """\d+""") && filter(l(5)) == "i" && filter(l(11)) != "\\N" && filter(l(11)) != null
})
val lines2 = HdfsUtil.getString("/user/hive/warehouse/rd_e_pal/dt=20150312/hr=03/rd_e_pal.20150312.03-231").split("\n").filter(line => {
val l = line.split("\001")
l.length >= 31 && l(4).matches( """\d+""") && filter(l(5)) == "i" && filter(l(11)) != "\\N" && filter(l(11)) != null
})
val start = System.currentTimeMillis()
(1 to 1).foreach(n => {
lines.foreach(line => {
val record = new ProducerRecord[Array[Byte], Array[Byte]](topicName, line.getBytes())
producer.send(record)
})
lines2.foreach(line => {
val record = new ProducerRecord[Array[Byte], Array[Byte]](topicName, line.getBytes())
producer.send(record)
})
})
println("---------cost:" + (System.currentTimeMillis() - start))
producer.close()
}
}
kafka 8.2.1新版API是异步写的,效率非常高.
参数传递:
n1:9092,n2:9092,n3:9092 test-rep-one buffer.memory=67108864 acks=1 bootstrap.servers=n1:9092,n2:9092,n3:9092 buffer.memory=67108864 batch.size=8196