模拟生成单词,消费单词

package kafka;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;

import java.util.Properties;
import java.util.Random;
import java.util.UUID;

/**
 * 模拟实时生成单词
 */
public class GenerateWords {
    public static void main(String[] args) {
        Properties props = new Properties();
        props.setProperty("bootstrap.servers","hadoop01:9092,hadoop02:9092,hadoop03:9092");
        props.setProperty("key.serializer", StringSerializer.class.getName());
        props.setProperty("value.serializer","org.apache.kafka.common.serialization.StringSerializer");

        //发送数据的时候做应答
        /**
         * 取值范围:[all,-1,0,1]
         * 默认:1
         * 0: leader不做任何应答
         * 1: leader会给producer做应答
         * -1,all: follower -> leader -> producer
         */
        props.setProperty("acks","1");
        //创建一个生产者得客户端实例
        KafkaProducer<String, String> kafkaProducer = new KafkaProducer<>(props);

        while(true){
            try {
                Thread.sleep(500);
                String key = UUID.randomUUID().toString();

                //随机生成一个单词
                int base=97;
                int asi_code =new Random().nextInt(26)+base;

                char word = (char)asi_code;
                System.out.println("word="+word);

                ProducerRecord<String, String> record = new ProducerRecord<>("wordcount", key, word + "");
                kafkaProducer.send(record);
                System.out.println("record="+record);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}

 

package kafka


import java.lang

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
  * 消费自定义的数据(单词)
  */
object ConsumerWords {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("consumerwords").setMaster("local[*]")
    //每2秒拉取一次数据
    val ssc = new StreamingContext(conf,Seconds(2))
    //定义一个消费组id
    val groupid ="day_001"

    //配置参数
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "hadoop01:9092,hadoop02:9092,hadoop03:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> groupid,
      "auto.offset.reset" -> "earliest",
      //"auto.commit.interval.ms"-> "1000",设置为1秒提交一次offset,默认是5秒
      "enable.auto.commit" -> (false: lang.Boolean) //是否自动递交偏移量
    )
    //创建kafka
    val stream = KafkaUtils.createDirectStream(
      ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](Array("wordcount"), kafkaParams)
    )
    stream.foreachRDD(rdd=>{
      rdd.map(crd=>(crd.value(),1)).reduceByKey(_+_).foreach(println(_))
    })
    ssc.start()
    ssc.awaitTermination()
  }
}

该方案不能够累计过去的单词个数,可以借助updateStateBykey算子实现.

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值