pom依赖
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.12</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>com.wm.realtime</groupId>
<artifactId>gmall-common</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-spark</artifactId>
<version>5.0.0-HBase-2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
</dependency>
<!--es 相关依赖开始-->
<dependency>
<groupId>io.searchbox</groupId>
<artifactId>jest</artifactId>
<version>6.3.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
</dependency>
</dependencies>
每日订单销售额和每小时实时统计
import com.alibaba.fastjson.JSON
import com.atguigu.gmall.realtime.bean.OrderInfo
import com.atguigu.realtime.gmall.common.Constant
import org.apache.spark.streaming.dstream.DStream
/**
* 每日订单销售额和每小时实时统计
*/
object OrderAppV2 extends BaseApp {
override val topics: Set[String] = Set(Constant.ORDER_INFO_TOPIC)
override val groupId: String = "OrderApp"
override val appName: String = "OrderApp"
override val master: String = "local[2]"
override val bachTime: Int = 3
override def run(sourceStream: DStream[String]): Unit = {
sourceStream
.map(json => JSON.parseObject(json, classOf[OrderInfo]))
.foreachRDD(rdd => {
import org.apache.phoenix.spark._
rdd.saveToPhoenix("gmall_order_info0421",
Seq("ID", "PROVINCE_ID", "CONSIGNEE", "ORDER_COMMENT", "CONSIGNEE_TEL", "ORDER_STATUS", "PAYMENT_WAY", "USER_ID", "IMG_URL", "TOTAL_AMOUNT", "EXPIRE_TIME", "DELIVERY_ADDRESS", "CREATE_TIME", "OPERATE_TIME", "TRACKING_NO", "PARENT_ORDER_ID", "OUT_TRADE_NO", "TRADE_BODY", "CREATE_DATE", "CREATE_HOUR"),
zkUrl = Option("hadoop102,hadoop103,hadoop104:2181"))
})
}
}
需求:同一设备,5分钟内三次及以上用不同账号登录并领取优惠劵
abstract class BaseApp {
val topics: Set[String]
val groupId: String
val master: String
val appName: String
val bachTime: Int
def run(sourceStream: DStream[String]): Unit
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setMaster(master).setAppName(appName)
val ssc = new StreamingContext(conf, Seconds(bachTime))
val sourceStream: DStream[String] = MyKafkaUtil
.getKafkaStream(ssc, groupId, topics)
run(sourceStream)
// 4. 启动上下文
ssc.start()
// 5. 阻塞
ssc.awaitTermination()
}
}
import java.{
util => ju}
import com.alibaba.fastjson.JSON
import com.atguigu.gmall.realtime.bean.{
AlertInfo, EventLog}
import com.atguigu.gmall.realtime.util.ESUtil
import com.atguigu.realtime.gmall.common.Constant
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{
Minutes, Seconds}
import scala.util.control.Breaks._
/**
* 实时预警:同一设备,5分钟内三次及以上用不同账号登录并领取优惠劵
*/
object AlertApp extends BaseApp {
override val topics: Set[String] = Set(Constant.EVENT_TOPIC)
override val groupId: String = "AlertApp"
override val master: String = "local[2]"
override val appName: String = "AlertApp"
override val bachTime: Int = 3
override def run(sourceStream: DStream[String]): Unit = {
val eventLogStream = sourceStream
.map(json => {
val log = JSON.parseObject(json, classOf[EventLog])
(log.mid, log)
})
.window(Minutes(5), Seconds(6))
val alertInfoStream = eventLogStream
.groupByKey()
.map {
case (mid, it: Iterable[EventLog]) =>
// 对 it变量, 计算想要数据
// 1. 存储在当前设备领取优惠券的用户id
val uids = new ju.HashSet[String]()
// 2. 在当前设备的所有操作事件
val events = new ju.ArrayList[String]()
// 3. 存储优惠券所在的商品
val items = new ju.HashSet[String](