一、案例介绍
案例包含三个表:tbDate、tbStock、tbStockDetail。字段信息如下表:
二、要求
1、计算所有订单中每年的销售单数、销售总额
2、计算所有订单每年最大金额订单的销售额
3、计算所有订单中每年最畅销货品
三、代码
case class tbStock(ordernumber:String,locationid:String,dateid:String) extends Serializable
case class tbStockDetail(ordernumber:String, rownum:Int, itemid:String, number:Int, price:Double, amount:Double) extends Serializable
case class tbDate(dateid:String, years:Int, theyear:Int, month:Int, day:Int, weekday:Int, week:Int, quarter:Int, period:Int, halfmonth:Int) extends Serializable
object StockDemo {
def main(args: Array[String]): Unit = {
//创建sparkSession对象
val sparkConf = new SparkConf().setAppName("StockDemo").setMaster("local[*]")
val spark = SparkSession.builder().config(sparkConf).getOrCreate()
import spark.implicits._
//读取文件,生成RDD
val tbStockRdd = spark.sparkContext.textFile("input\\stock\\tbStock.txt")
val tbStockDetailRdd = spark.sparkContext.textFile("input\\stock\\tbStockDetail.txt")
val tbDateRdd = spark.sparkContext.textFile("input\\stock\\tbDate.txt")
//RDD处理数据,然后toDS ,转成Dataset
val tbStockDS = tbStockRdd.map(_.split(",")).map(attr=>tbStock(attr(0),attr(1),attr(2) )).toDS
val tbStockDetailDS = tbStockDetailRdd.map(_.split(",")).map(attr=> tbStockDetail(attr(0),attr(1).trim().toInt,attr(2),attr(3).trim().toInt,
attr(4).trim().toDouble, attr(5).trim().toDouble)).toDS
val tbDateDS = tbDateRdd.map(_.split(",")).map(attr=> tbDate(attr(0),attr(1).trim().toInt, attr(2).trim().toInt,attr(3).trim().toInt, attr(4).trim().toInt,
attr(5).trim().toInt, attr(6).trim().toInt, attr(7).trim().toInt, attr(8).trim().toInt, attr(9).trim().toInt)).toDS
//展示
//tbStockDS.show()
//tbStockDetailDS.show()
//tbDateDS.show()
//注册表
tbStockDS.createOrReplaceTempView("tbStock")
tbDateDS.createOrReplaceTempView("tbDate")
tbStockDetailDS.createOrReplaceTempView("tbStockDetail")
/*1、计算所有订单中每年的销售单数、销售总额
* 三个表连接后以 count(distinct a.ordernumber)计销售单数,sum(b.amount)计销售总额*/
spark.sql("SELECT c.theyear,COUNT(a.ordernumber),SUM(b.amount) " +
"FROM tbStock a " +
"JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
"JOIN tbDate c ON a.dateid = c.dateid " +
"GROUP BY c.theyear").show
/*2、计算所有订单每年最大金额订单的销售额 目标:统计每年最大金额订单的销售额:*/
spark.sql("SELECT theyear, MAX(c.SumOfAmount) AS SumOfAmount FROM " +
"(SELECT a.dateid, a.ordernumber, SUM(b.amount) AS SumOfAmount FROM tbStock a " +
"JOIN tbStockDetail b ON a.ordernumber = b.ordernumber GROUP BY a.dateid, a.ordernumber ) c " +
"JOIN tbDate d ON c.dateid = d.dateid GROUP BY theyear ORDER BY theyear DESC").show
/*3、计算所有订单中每年最畅销货品
目标:统计每年最畅销货品(哪个货品销售额 amount 在当年最高,哪个就是最畅销货品)*/
/* spark.sql("SELECT DISTINCT e.theyear, e.itemid, f.maxofamount FROM " +
"(SELECT c.theyear, b.itemid, SUM(b.amount) AS sumofamount FROM tbStock a " +
"JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
"JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) e " +
"JOIN (SELECT d.theyear, MAX(d.sumofamount) AS maxofamount FROM " +
"(SELECT c.theyear, b.itemid, SUM(b.amount) AS sumofamount FROM tbStock a " +
"JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
"JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) d " +
"GROUP BY d.theyear ) f ON e.theyear = f.theyear AND e.sumofamount = f.maxofamount " +
"ORDER BY e.theyear").show*/
/* d.theyear,Count(itemid),MAX(d.SumOfAmount) AS MaxOfAmount*/
spark.sql("SELECT d.theyear,MAX(itemid),MAX(d.SumOfAmount) AS MaxOfAmount FROM " +
"(SELECT c.theyear, b.itemid, SUM(b.amount) AS SumOfAmount FROM tbStock a " +
"JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
"JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) d GROUP BY d.theyear").show
spark.stop()
}
}