spark streaming准实时计算demo

5 篇文章 0 订阅
4 篇文章 0 订阅

 

 

package com.chexun.statistic

import java.sql.{Connection, DriverManager}
import java.util.Date

import com.chexun.statistic.RealTimeAdv._
import kafka.serializer.StringDecoder
import org.apache.commons.lang.time.DateFormatUtils
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}

/**
 * 记录最近1分钟的数据
 * Created by hanyiting on 2015/08/13.
 */
object  RealtimeCount {

  case class AdvLoging(vtime: Long, userIp: Long, muid: String, uid: String, ucp: String, adurl: String)

  case class Adv(userIp: Long, muid: String, ucp: String, adurl: String, location: String)

  def main(args: Array[String]) {

    val url = "jdbc:mysql://10.0.0.198:3306/test"
    val usr = "test"
    val pwd = "test"

    val sparkConf = new SparkConf().set("spark.streaming.unpersist", "true").set("spark.cleaner.ttl", "43200")
      .setExecutorEnv("SPARK_JAVA_OPTS", "-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps").setAppName("RealtimeCount")
    val sc = new SparkContext(sparkConf)
    val ssc = new StreamingContext(sc, Seconds(60))


    //define the kafka parameters, broker list must be specified
    val kafkaParams = Map("metadata.broker.list" -> "10.0.0.37:9092,10.0.0.30:9092,10.0.0.35:9092,10.0.0.26:9092,10.0.0.27:9092")

    //define which topics to read from
    val topics = Set("chexun1", "chexun2")

    val lines = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topics).map(x => x._2)

    //过滤掉adurl为空的数据,然后进行转换,提取出location,并过滤掉location为空的数据
    val tmpdf = lines.map(_.split("\t")).map(x => AdvLoging(x(9).toLong, x(8).toLong, x(1), x(0), x(3), x(24))).filter(y => (y.adurl != null && !y.adurl.equals("null"))).map(x => Adv(x.userIp, x.muid, x.ucp, getUrl(x.adurl), getLocation(x.adurl))).filter(z => z.location != null && !("").equals(z.location))
    tmpdf.foreachRDD { rdd =>
      val sqlContext = new org.apache.spark.sql.SQLContext(sc)
      import sqlContext.implicits._
      val df = rdd.toDF().registerTempTable("adv")
      //获取当前时间,精确到分
      val stattime = DateFormatUtils.format(new Date, "yyyy-MM-dd HH:mm:00")
      //对不同位置的广告进行分组,求pv和uv
      val rcount = sqlContext.sql("select location,count(*),count(distinct muid) from adv group by location").foreachPartition(
        datas => {
          val conn: Connection = DriverManager.getConnection(url, usr, pwd)
          val pstat = conn.prepareStatement("insert into loging_adv_realtime(stat_time,location,pv,uv) values (?,?,?,?)")
          for (data <- datas) {
            pstat.setString(1, stattime)
            pstat.setString(2, data(0).toString)
            pstat.setString(3, data(1).toString)
            pstat.setString(4, data(2).toString)
            pstat.executeUpdate()
          }
        }
      )
    }

    ssc.start()
    ssc.awaitTermination()
  }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值