spark实时读取kafka

object BigScreenStream {

 def main(args: Array[String]): Unit = {
val port=5555
val hdfs_file_path = "/spark/streaming/stop/bigScreen" //判断消	息文件是否存在,如果存在就

//    val time=args(0).toLong
//    val flag=args(1)
//    val firstCount=args(2).toLong
//    val threadCount=args(3).toLong
   val time=3
val flag="0"
val firstCount=5000
val threadCount=100
  val server=new Server(port)
//创建StreamingContext
val ssc=createStreamingContext(time,flag,firstCount,threadCount)
//开始执行
ssc.start()
//StreamUtiils.startHttpServer(server,ssc)

//启动接受停止请求的守护进程
//    StreamUtiils.daemonHttpServer(server,ssc)  //方式一通过Http方式优雅的关闭策略
//
StreamUtils.stopByMarkFile(server,ssc,hdfs_file_path)       //方式二通过扫描HDFS文件来优雅的关闭
//StreamUtiils.checkStreamingState(server,ssc)
//等待任务终止
ssc.awaitTermination()
  }

}


def createStreamingContext(time :Long,flag :String,firstCount :Long,threadCount :Long):StreamingContext={

val appName=this.getClass.getName
println("appName:"+appName)
var conf = new SparkConf()
  .setMaster(Property.getProperty("yarn"))
  .setAppName(appName)
conf.set("spark.streaming.stopGracefullyOnShutdown","true")//优雅的关闭
conf.set("spark.streaming.receiver.writeAheadLog.enable","true")//防止数据丢失
conf.set("spark.streaming.backpressure.enabled","true")//激活削峰功能
conf.set("spark.streaming.backpressure.initialRate",firstCount.toString)//第一次读取的最大数据值
conf.set("spark.streaming.kafka.maxRatePerPartition",threadCount.toString)//每个进程每秒最多从kafka读取的数据条数
conf.set("spark.mongodb.input.uri", Property.getProperty("bigScreenInUri1"))
conf.set("spark.mongodb.output.uri", Property.getProperty("bigScreenOutUri1"))
conf.set("spark.streaming.kafka.consumer.poll.ms","10000")//拉取数据超时时间

val ssc = new StreamingContext(conf, Seconds(time))
if("0".equals(flag)){
  ssc.sparkContext.setLogLevel("ERROR")
}
val kafkaParams = Map[String, Object](
  "bootstrap.servers" -> Property.getProperty("kafkaServers"),
  "key.deserializer" -> classOf[StringDeserializer],
  "value.deserializer" -> classOf[StringDeserializer],
  "group.id" -> Property.getProperty("group"),
  "auto.offset.reset" -> "latest",
  "enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array(Property.getProperty("bigScreenTopics"))

val stream = KafkaUtils.createDirectStream[String, String](
  ssc,
  PreferConsistent,
  Subscribe[String, String](topics, kafkaParams)
)

val offlineTranType=Property.getProperty("offlineTranType")
val values=offlineTranType.split(",")
var map: util.Map[String, String]=new util.HashMap()
for( i <- 0 to values.length-1){
  map.put(values(i),values(i))
}
System.out.println("线下业务码:"+map)

stream.foreachRDD { rdd =>
  val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges

  val baseRdd = rdd.map(record => (record.value)).filter(!"null".equals(_)).distinct().filter(
    rdd=>{
      val json = JSONObject.fromObject(rdd)
      val K_BUSINESS_TYPE = json.get("K_BUSINESS_TYPE").asInstanceOf[String]
      val K_TRADE_TRANTYPE = json.get("K_TRADE_TRANTYPE").asInstanceOf[String]
      if("offline".equals(K_BUSINESS_TYPE)){
        //通过线下业务码过滤交易
           if(map.containsKey(K_TRADE_TRANTYPE)){
             //System.out.println("线下业务码匹配成功:"+K_TRADE_TRANTYPE)
             true
           }else{
             //System.out.println("线下业务码匹配失败:"+K_TRADE_TRANTYPE)
             false
           }
      }else{
           val K_TRADE_TYPE = json.get("K_TRADE_TYPE").asInstanceOf[String]
                  true
      }
    }
  )
  baseRdd.persist(StorageLevel.MEMORY_AND_DISK)


发布了45 篇原创文章 · 获赞 1 · 访问量 1354
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览