Scala Spark接收kafka数据来源,存入Es

吼吼,第一次用scala写,虽然是对着抄,但磕磕绊绊中还是运行成功啦~

配置文件:

 <properties>
        <spark.version>2.2.0</spark.version>
        <scala.version>2.11</scala.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_${scala.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_${scala.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
            <version>${spark.version}</version>
        </dependency>

scala发送消息到 Kafka


import java.util.concurrent.Future
import java.util.{Properties, UUID}

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord, RecordMetadata}
import org.apache.kafka.common.serialization.StringSerializer

object Producer {
  def main(args: Array[String]): Unit = {
    val props = new Properties()
    // 配置kafka集群地址和端口,注意若使用如下,则需要在C:\Windows\System32\drivers\etc\hosts文件中配置ip和映射。否则配置ip+端口
    props.setProperty("bootstrap.servers", "cbp3.chinaoly.com:6667,cbp4.chinaoly.com:6667,cbp5.chinaoly.com:6667,cbp6.chinaoly.com:6667,cbp7.chinaoly.com:6667,cbp8.chinaoly.com:6667")
    //传输的格式
    props.setProperty("key.serializer","org.apache.kafka.common.serialization.StringSerializer")
    props.setProperty("value.serializer" ,classOf[StringSerializer].getName)
    //主要用来做应答的,默认为1:
    //0 客户端将数据发送到kafka集群,不会等待集群的应答
    //1 客户端将数据发送到kafka集群,会等待leader的应答,leader不会等待follower
    //-1/all 客户端将数据发送到kafka集群,learder会等到follow应答,leader client应答
    //props.setProperty("acks",)
    val producerClient = new KafkaProducer[String,String](props)
    val record = new ProducerRecord[String,String]("topic-ztLogInfo",0,
      UUID.randomUUID().toString,"hello")
    val result: Future[RecordMetadata] =producerClient.send(record)

    while (!result.isDone){
      print(result.get())
    }
    producerClient.close()
  }

}

scala接收kafka数据


import java.util

import org.apache.kafka.clients.consumer.{ConsumerRecord, ConsumerRecords, KafkaConsumer}
import org.apache.kafka.common.serialization.StringDeserializer

object Consumer {
  def main(args: Array[String]): Unit = {
    val configs = new util.HashMap[String,AnyRef]()
    configs.put("bootstrap.servers","cbp3.chinaoly.com:6667,cbp4.chinaoly.com:6667,cbp5.chinaoly.com:6667,cbp6.chinaoly.com:6667,cbp7.chinaoly.com:6667,cbp8.chinaoly.com:6667")
    configs.put("key.deserializer",classOf[StringDeserializer].getName)
    configs.put("value.deserializer",classOf[StringDeserializer].getName)
    //消费者的groupid
    configs.put("group.id","9527")
    //[latest,earliest] : 默认latest,consumer启动连上kafka后,只能看到这个时间点后的数据,之前的看不到
    //earliest:始终从offiset = 0的位置开始消费数据
    configs.put("auto.offset.reset","earliest")
    //是否要提交消费数据的偏移量offset
    configs.put("enable.auto.commit","true")

    val consumer = new KafkaConsumer[String,String](configs)
    consumer.subscribe(util.Arrays.asList("topic-ztLogInfo"))

    while (true){
      //设置拉取时间,获取数据
      val records:ConsumerRecords[String,String] =consumer.poll(3000)
      println(" == count : "+records.count())
      val recordIter:util.Iterator[ConsumerRecord[String,String]] = records.iterator()
      while(recordIter.hasNext){
        val record = recordIter.next()
        println(" == value : "+record.value())
      }
    }
    consumer.close()
  }
}

第二版:

spark-streaming接收kafka数据,存入Es

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.elasticsearch.spark.rdd.EsSpark

object LogReceive {

  def main(args: Array[String]): Unit = {
    //     ============================ kafka读取数据
    val conf = new SparkConf().setAppName("Kafka_director")
      .set("spark.streaming.kafka.consumer.poll.ms", "30000")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .setMaster("local[*]")
    val ssc = new StreamingContext(conf, Seconds("3".toInt))

    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> ("cbp3.chinaoly.com:6667,cbp4.chinaoly.com:6667,cbp5.chinaoly.com:6667," +
        "cbp6.chinaoly.com:6667,cbp7.chinaoly.com:6667,cbp8.chinaoly.com:6667"),
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "9527",
      "auto.offset.reset" -> "earliest",
      "enable.auto.commit" -> (true: java.lang.Boolean)
    )
    var topics = Array("topic-ztLogInfo")

    var stream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )
    println(" === begin")
    var value: DStream[String] = stream.map(record => (record.value))
    value.print()
    println(" === end ")

    //     ============================ 计算处理
    //     ============================ 存入ES
    val esConf = Map(
      ("es.nodes" , "192.168.52.37"), // es节点
      ("es.port" , "9296"),//es端口
      ("es.resource" , "t_log_info/logInfo")//es索引
    )


    value.foreachRDD(rdd =>{
      EsSpark.saveJsonToEs(rdd,esConf)
    })

    //     ============================ 关闭
    ssc.start()
    ssc.awaitTermination()
  }
}

 

 

 

 

 

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值