用sparkstreaming按天计算地区销售额简单模版_java根据日期计算日销售额的代码-CSDN博客

producer端：

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._

import scala.util.Random

/**
  * Created by zengxiaosen on 16/9/26.
  */
/*
在命令行输入 kafka-console-consumer.sh --zookeeper slave1:2181 --topic orderTopic
来看看我们生产的数据
 */
object OrderProductor {

  def main(args: Array[String]): Unit = {

    val topic = "orderTopic"
    val brokers = "master:9092,slave1:9092"
    val props = new HashMap[String, Object]()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers)
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")

    val producer = new KafkaProducer[String, String](props)

    //生产10条订单
    while(true){
      (1 to 10).foreach{
        messageNum =>
          //地区ID,订单id,订单金额,订单时间
          val str = messageNum+","+Random.nextInt(10)+","+Math.round(Random.nextDouble()*100)+","+DateUtils.getCurrentDateTime
          val message = new ProducerRecord[String, String](topic, null, str)
          producer.send(message)

      }
      Thread.sleep(1000)
    }
  }

}

consumer端：

import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
  * Created by zengxiaosen on 16/9/26.
  */
object AreaAmt {
  //每批次的wordcount

  def main(args: Array[String]): Unit = {
    /*
    对kafka来讲,groupid的作用是:
    我们想多个作业同时消费同一个topic时,
    1每个作业拿到完整数据,计算互不干扰;
    2每个作业拿到一部分数据,相当于实现负载均衡
    当多个作业groupid相同时,属于2
    否则属于情况1
     */
    val zkQuorum = "slave1:2181"
    val group = "g1"
    val topics = "logTopic"
    val numThreads = 2
    //setmaster的核数至少给2,如果给1,资源不够则无法计算,至少需要一个核进行维护,一个计算
    val sparkConf = new SparkConf().setAppName("AreaAmt").setMaster("local[2]")
    val ssc = new StreamingContext(sparkConf, Seconds(2))//两秒一个批次
    ssc.checkpoint("hdfs://192:168.75.130:8020/user/root/checkpoint/AreaAmt")//设置有状态检查点
    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    //val topicMap2 = Map(topics->2)
    //得出写到kafka里面每一行每一行的数据
    //每个时间段批次
    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
    //产生我们需要的pair rdd
    val linerdd = lines.map{row =>{
      val arr = row.split(",")
      //按日期按地区计算销售额2016-09-04_Area
      /*
      继续细分到城市,无非是key该表一下,其他地方都是一样的
       */
      val key = arr(3).substring(0,10)+"_"+arr(0)
      val amt = arr(2).toInt
      (key, amt)
    }}
    val addFunc = (currValues: Seq[Int], preValueState: Option[Int]) =>{
      //通过spark内部的reducebykey按key规约,然后这里传入某key当前批次的seq,再计算key的总和
      val currentCount = currValues.sum
      //已经累加的值
      val previousCount = preValueState.getOrElse(0)
      //返回累加后的结果,是一个Option[Int]类型
      Some(currentCount + previousCount)
    }

    linerdd.updateStateByKey[Int](addFunc).print()


    ssc.start()
    ssc.awaitTermination()

  }

}

关于DateUtil:

自己随意写，这里写两个模版：

import java.util.Calendar
import java.text.SimpleDateFormat
/**
  * Created by zengxiaosen on 16/9/26.
  */
object DateUtils {

  def getCurrentDateTime: String = getCurrentDateTime("K:mm aa")

  def getCurrentDate: String = getCurrentDateTime("EEEE, MMMM d")

  private def getCurrentDateTime(dateTimeFormat: String): String = {
    val dateFormat = new SimpleDateFormat(dateTimeFormat)
    val cal = Calendar.getInstance()
    dateFormat.format(cal.getTime())
  }

}

另外模版：

import java.text.SimpleDateFormat
import java.util.Calendar

/**
  * Created by zengxiaosen on 16/9/26.
  */
object DateUtils01 {

  def getCurrentTime(): String =
  {
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    val c = Calendar.getInstance()

    sdf.format(c.getTime)
  }

  def main(args: Array[String]): Unit = {
    println("2016-09-04 15:19:09".substring(0, 10))
  }

}