spark项目代码

ods层-ProducerClientLog
def main(args: Array[String]): Unit = {
    if (args.length == 0) {
      println("请输入日期")
      System.exit(1) //  程序终止
    }

    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN) {
      spark = SparkSession
        .builder()
        .master("local[*]")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris",ConfigUtils.HIVE_METASTORE_URIS)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .master("yarn")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS)
        .enableHiveSupport()
        .getOrCreate()
    }


    spark.sparkContext.setLogLevel("WARN")
     val rdd1: RDD[String] = spark.sparkContext.textFile(s"${ConfigUtils.HDFS_CLIENT_LOG_PATH}/${args(0)}/currentday_clientlog.tar.gz")
    val rdd2: RDD[(String, String)] = rdd1.map(v => v.split("&")).filter(v => v.length == 6).map(v => (v(2), v(3)))
    //过滤,要求拆分后,数组长度为6,将2,3字段取出成为二元组
    rdd2.map(v => {

      var eventName = v._1 //MINIK_CLIENT_SONG_PLAY_OPERATE_REQ
      var eventJson = v._2 //{"songid": "LX_U016464", "mid": 96596, "optrate_type": 3, }

      //如果v._1中的数据是MINIK_CLIENT_SONG_PLAY_OPERATE_REQ ,即为我们要使用的数据,然后将json数据拆分开来,
      if ("MINIK_CLIENT_SONG_PLAY_OPERATE_REQ".equals(v._1)) {
        val jSONObject: JSONObject = JSON.parseObject(eventJson)
        //获取json中每个标签的数据
        val songid: String = jSONObject.getString("songid")
        val mid: String = jSONObject.getString("mid")
        val optrate_type: String = jSONObject.getString("optrate_type")
        val uid: String = jSONObject.getString("uid")
        val consume_type: String = jSONObject.getString("consume_type")
        val dur_time: String = jSONObject.getString("dur_time")
        val session_id: String = jSONObject.getString("session_id")
        val songname: String = jSONObject.getString("songname")
        val pkg_id: String = jSONObject.getString("pkg_id")
        val order_id: String = jSONObject.getString("order_id")

        //按照指定格式将数据存入HDFS中,此处用“\t”作间隔符,方便将数据放入数仓中,---数仓建表时要求的间隔符与此处一致
        (eventName, songid + "\t" + mid + "\t" + optrate_type + "\t" + uid + "\t"
        + consume_type + "\t" + dur_time + "\t" + session_id + "\t" + songname + "\t" + pkg_id + "\t" + order_id)
        //其它数据可以用作其它用途,此处不做任何处理
      } else if ("MINIK_CLIENT_ADVERTISEMENT_RECORD".equals(v._1)) {
        //do something
        (eventName, "ceshi")
      } else {
        v
      }
    }).saveAsHadoopFile(s"${ConfigUtils.HDFS_CLIENT_LOG_PATH}/${args(0)}/all_tables",
    classOf[String], classOf[String], classOf[PairRDDMultipleTextOutputFormat])
    //.saveAsHadoopFile的参数,(文件保存位置,key的类型,v的类型,文件名格式)

    //sql语句处理,将数据导入表中
    spark.sql("use songdb")
    val sql =
        s"""
          |load data inpath '${ConfigUtils.HDFS_CLIENT_LOG_PATH}/${args(0)}/all_tables/MINIK_CLIENT_SONG_PLAY_OPERATE_REQ'
          |into table TO_CLIENT_SONG_PLAY_OPERATE_REQ_D partition (data_dt ='${args(0)}')
          |""".stripMargin
    spark.sql(sql)
    spark.close()
  }
EDS层user-GenerateTwCnsmBriefD
object GenerateTwCnsmBriefD {
  def main(args: Array[String]): Unit = {
    if(args.size==0){
      println("请输入日期:")
      System.exit(1)
    }
    val inputdate = args(0)


    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN){
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("local[*]")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("yarn")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }
    spark.sparkContext.setLogLevel("WARN")
    spark.sql("use songdb")
    spark.sql(
      s"""
        |insert overwrite table TW_CNSM_BRIEF_D partition(data_dt = '${inputdate}')
        |select
        |ID                  ,
        |TRD_ID              ,
        |UID                 ,
        |MID                 ,
        |PRDCD_TYPE          ,
        |PAY_TYPE            ,
        |ACT_TM              ,
        |PKG_ID              ,
        |AMT COIN_PRC  		,		--金额
        |1 COIN_CNT			,		--数量
        |ACT_TM UPDATE_TM 	,		--更新时间
        |ORDR_ID             ,
        |ACTV_NM             ,
        |PKG_PRC             ,
        |PKG_DSCNT           ,
        |CPN_TYPE            ,
        |case when ORDR_TYPE =1 then 0
        |	 when ORDR_TYPE =2 then 1
        |	 when ORDR_TYPE =3 or ORDR_TYPE = 4 then 2 end ABN_TYP
        |from TO_YCAK_CNSM_D where data_dt = '${inputdate}'
        |""".stripMargin)

    spark.close()
  }

}

eds层user - GenerateTwUsrBaseinfoD

object GenerateTwUsrBaseinfoD {
  def main(args: Array[String]): Unit = {
    if(args.size==0){
      println("请输入日期:")
      System.exit(1)
    }
    var inputdate = args(0)

    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN){
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("local[*]")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("yarn")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }
    spark.sql("use songdb")

    //微信
    val WXUser: DataFrame = spark.sql(
      """
        |select
        |UID,
        |REG_MID,
        |"1" REG_CHNL,
        |WX_ID REF_UID,
        |GDR,
        |BIRTHDAY,
        |MSISDN,
        |LOC_ID,
        |LOG_MDE,
        |SUBSTRING(reg_tm,0,8) REG_DT,
        |SUBSTRING(reg_tm,9,6) REG_TM,
        |USR_EXP,
        |SCORE,
        |LEVEL,
        |null USR_TYPE,
        |null IS_CERT,
        |null IS_STDNT
        |from TO_YCAK_USR_D
        |""".stripMargin)
    //支付宝
    val AliUser: DataFrame = spark.sql(
      """
        |select
        |UID,
        |REG_MID,
        |"2" REG_CHNL,
        |ALY_ID REF_UID,
        |GDR,
        |BIRTHDAY,
        |MSISDN,
        |LOC_ID,
        |LOG_MDE,
        |SUBSTRING(reg_tm,0,8) REG_DT,
        |SUBSTRING(reg_tm,9,6) REG_TM,
        |USR_EXP,
        |SCORE,
        |LEVEL,
        |USR_TYPE USR_TYPE,
        |IS_CERT IS_CERT,
        |IS_STDNT IS_STDNT
        |from TO_YCAK_USR_ALI_D
        |
        |""".stripMargin)

    //qq
    val QQUser: DataFrame = spark.sql(
      """
        |select
        |UID,
        |REG_MID,
        |"3" REG_CHNL,
        |QQID REF_UID,
        |GDR,
        |BIRTHDAY,
        |MSISDN,
        |LOC_ID,
        |LOG_MDE,
        |SUBSTRING(reg_tm,0,8) REG_DT,
        |SUBSTRING(reg_tm,9,6) REG_TM,
        |USR_EXP,
        |SCORE,
        |LEVEL,
        |null USR_TYPE,
        |null IS_CERT,
        |null IS_STDNT
        |from TO_YCAK_USR_QQ_D
        |""".stripMargin)

    //  APP
    val APPUser: DataFrame = spark.sql(
      """
        |select
        |UID,
        |REG_MID,
        |"4" REG_CHNL,
        |APP_ID REF_UID,
        |GDR,
        |BIRTHDAY,
        |MSISDN,
        |LOC_ID,
        |null LOG_MDE,
        |SUBSTRING(reg_tm,0,8) REG_DT,
        |SUBSTRING(reg_tm,9,6) REG_TM,
        |USR_EXP,
        |null SCORE,
        |LEVEL,
        |null USR_TYPE,
        |null IS_CERT,
        |null IS_STDNT
        |from TO_YCAK_USR_APP_D
        |""".stripMargin)

    //创建微信、支付宝、QQ、APP四个表union的的用户表
    WXUser.union(AliUser).union(QQUser).union(APPUser).createTempView("user")

    spark.sql(
      s"""
        |insert overwrite table TW_USR_BASEINFO_D
        |partition (data_dt='${inputdate}')
        |select
        |t1.UID,
        |t2.REG_MID,
        |t2.REG_CHNL,
        |t2.REF_UID,
        |t2.GDR,
        |t2.BIRTHDAY,
        |t2.MSISDN,
        |t2.LOC_ID,
        |t2.LOG_MDE,
        |t2.REG_DT,
        |t2.REG_TM,
        |t2.USR_EXP,
        |t2.SCORE,
        |t2.LEVEL,
        |t2.USR_TYPE,
        |t2.IS_CERT,
        |t2.IS_STDNT
        |from (
        |select distinct uid
        |from TO_YCAK_USR_LOGIN_D
        |where data_dt = '${inputdate}'
        |) t1 left join user t2 on t1.uid = t2.uid
        |
        |""".stripMargin)
    //七日连续登录用户

    val pre7day = DateUtils.getCurrentDatePreDate(inputdate,7)
    val df: DataFrame = spark.sql(
      s"""
         |
         |
         |select
         |t1.uid,
         |case when t2.REG_CHNL = 1 then '微信'
         |     when t2.REG_CHNL = 2 then '支付宝'
         |     when t2.REG_CHNL = 3 then 'QQ'
         |     when t2.REG_CHNL = 4 then 'APP'
         |     else '未知' end REG_CHNL,
         |t2.REF_UID,
         |case when t2.GDR = 1 then '男'
         |     when t2.GDR = 2 then '女'
         |     else '不明' end GDR,
         |t2.BIRTHDAY,
         |t2.MSISDN,
         |t2.REG_DT,
         |t2.LEVEL
         |from (
         |select uid ,count(*) c1
         |from TW_USR_BASEINFO_D
         |where data_dt BETWEEN '${pre7day}' and '${inputdate}'
         |group by uid
         |having c1 = 1  -- 此处要求是7天内连续登录的用户,因为表中只有一天的数据,所以c1=1,若表中有7天的数据,应该等于7
         |) t1 left join TW_USR_BASEINFO_D t2 on t1.uid = t2.uid
         |""".stripMargin)
    var pro = new Properties()
    pro.put("user",ConfigUtils.MYSQL_USER)
    pro.put("password",ConfigUtils.MYSQL_PASSWORD)
    df.write.mode(SaveMode.Overwrite).jdbc(ConfigUtils.MYSQL_URL,"user_7days_active",pro)

    spark.close()
  }

}
eds层machine - GenerateTwMacBaseinfoD
object GenerateTwMacBaseinfoD {
  def main(args: Array[String]): Unit = {
    if (args.size == 0) {
      println("请输入日期:")
      System.exit(1)
    }
    var inputdate = args(0)

    var spark: SparkSession = null
    if (ConfigUtils.LOCAL_RUN) {
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("local[*]")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    } else {
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("yarn")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }

    spark.udf.register("getProvince", (addr: String) => {
      var pro = ""
      try {
        if (addr.contains("内蒙古自治区")) {
          pro = "内蒙古"
        } else if (addr.contains("宁夏回族自治区")) {
          pro = "宁夏"
        } else if (addr.contains("西藏自治区")) {
          pro = "西藏"
        } else if (addr.contains("广西壮族自治区")) {
          pro = "广西"
        } else if (addr.contains("新疆维吾尔自治区")) {
          pro = "新疆"
        } else if (addr.contains("北京市")) {
          pro = "北京"
        } else if (addr.contains("上海市")) {
          pro = "上海"
        } else if (addr.contains("重庆市")) {
          pro = "重庆"
        } else if (addr.contains("天津市")) {
          pro = "天津"
        } else if (addr.contains("省")) {
          pro = addr.substring(0, addr.indexOf("省"))
        } else if (addr.isEmpty || addr.equals("")) {
          pro = addr
        }

      } catch {
        case e: Exception => {
          println("Provice :" + addr + "\n" + "错误原因" + e.getMessage)
        }
      }
      pro
    })

    spark.udf.register("getCity", (addr: String) => {
      var city = ""
      try {
        if (addr.contains("内蒙古自治区")) {
          if (addr.contains("阿拉善盟")) {
            city = "阿拉善盟"
          } else {
            city = addr.substring(addr.indexOf("区") + 1, addr.indexOf("市"))
          }
        } else if (addr.contains("宁夏回族自治区")) {
          city = addr.substring(addr.indexOf("区") + 1, addr.indexOf("市"))
        } else if (addr.contains("西藏自治区")) {
          city = addr.substring(addr.indexOf("区") + 1, addr.indexOf("市"))
        } else if (addr.contains("广西壮族自治区")) {
          city = addr.substring(addr.indexOf("区") + 1, addr.indexOf("市"))
        } else if (addr.contains("新疆维吾尔自治区")) {
          if (addr.contains("市")) {
            city = addr.substring(addr.indexOf("区") + 1, addr.indexOf("市"))
          } else if (addr.contains("州")) {
            city = addr.substring(addr.indexOf("区") + 1, addr.indexOf("州"))
          } else {
            val index = addr.indexOf("区")
            city = addr.substring(index + 1, addr.indexOf("区", index + 1))
            //addr.indexOf("区",index+1) 表示从index+1位置开始检索,即检索到第二个区的下表
          }
        } else if (addr.contains("西藏自治区")) {
          city = addr.substring(addr.indexOf("区") + 1, addr.indexOf("市"))
        } else if (addr.contains("北京市")) {
          city = addr.substring(addr.indexOf("市") + 1, addr.indexOf("区"))
        } else if (addr.contains("上海市")) {
          city = addr.substring(addr.indexOf("市") + 1, addr.indexOf("区"))
        } else if (addr.contains("天津市")) {
          city = addr.substring(addr.indexOf("市") + 1, addr.indexOf("区"))
        } else if (addr.contains("重庆市")) {
          if (addr.contains("区")) {
            city = addr.substring(addr.indexOf("市") + 1, addr.indexOf("区"))
          } else {
            city = addr.substring(addr.indexOf("市") + 1, addr.indexOf("县"))
          }

        } else if (addr.contains("省")) {
          if (addr.contains("市")) {
            city = addr.substring(addr.indexOf("省") + 1, addr.indexOf("市"))
          } else if (addr.contains("州")) {
            city = addr.substring(addr.indexOf("省") + 1, addr.indexOf("州"))
          }
        } else if (addr.isEmpty || addr.equals("")) {
          city = addr
        }

      } catch {
        case e: Exception => {
          println("city :" + addr + "\n" + "错误原因" + e.getMessage)
        }
      }
      city
    })
    //运营模式
    spark.udf.register("getBUS", (BUS: String) => {
      var tp = ""
      try {
        if (BUS.contains("街机版") || BUS.contains("加强版")) {
          tp = "自营"
        } else if (BUS.contains("联营版") || BUS.contains("咪哒KSHOW(风机版)")) {
          tp = "加盟"
        } else {
          tp = "其它"
        }

      } catch {
        case e: Exception => {
          println("type:" + tp + "\n" + e.getMessage)
        }
      }
      tp
    })

    spark.sql("use songdb")
    spark.sql(
      s"""

         |insert overwrite table TW_MAC_BASEINFO_D
         |partition (data_dt='${inputdate}')
         |select
         |t1.MID MID  ,
         |t3.MAC_NM MAC_NM,
         |t1.UI_VER SONG_WHSE_VER,
         |t1.EXEC_VER EXEC_VER,
         |t1.UI_VER UI_VER,
         |t1.HARD_ID HARD_ID,
         |t2.SALE_TM SALE_TM,
         |t2.REV_TM REV_TM,
         |"无" OPER_NM,
         |getProvince(t2.addr) PRVC,
         |getCity(t2.addr) CTY,
         |t8.AREA AREA,
         |nvl(t5.ADDR,t2.addr) ADDR,
         |t5.STORE_NM STORE_NM,
         |t5.SUB_TAG_NM SCENCE_CATGY,
         |t5.SUB_SCENE_CATGY_NM SUB_SCENCE_CATGY,
         |t5.TAG_NM SCENE,  --主场景名称
         |t5.SUB_SCENE_NM SUB_SCENE,
         |t5.BRND_NM BRND,
         |t5.SUB_BRND_NM SUB_BRND,
         |t3.PKG_NM PRDCT_NM,
         |t4.PRDCT_TYPE PRDCT_TYP,
         |getBUS(t3.PKG_NM) BUS_MODE ,--运营模式
         |t3.INV_RATE INV_RATE,
         |t3.AGE_RATE AGE_RATE,
         |t3.COM_RATE COM_RATE,
         |t3.PAR_RATE PAR_RATE,
         |t3.IS_ACTV IS_ACTV,
         |t3.ACTV_TM ACTV_TM,
         |t3.HAD_MPAY_FUNC PAY_SW,
         |t4.ADMINID PRTN_NM,
         |t1.CUR_LOGIN_TM CUR_LOGIN_TM
         |
         |from TO_YCAK_MAC_D t1
         |left join TO_YCAK_MAC_LOC_D t2 on t1.mid = t2.mid
         |left join TO_YCBK_MAC_ADMIN_MAP_D t3 on t1.mid = t3.mid
         |left join TO_YCBK_MAC_STORE_MAP_D t4 on t3.mid = t4.mid
         |left join TO_YCBK_STORE_D t5 on t4.store_id = t5.id
         |left join TO_YCBK_CITY_D t6 on t5.prvc_id = t6.prvc_id
         |left join TO_YCBK_CITY_D t7 on t5.cty_id = t7.cty_id
         |left join TO_YCBK_AREA_D t8 on t5.area_id = t8.area_id
         |
         |""".stripMargin)


    spark.close()
  }

}

eds层machine - GenerateTwMacLocD

object GenerateTwMacLocD {
  def main(args: Array[String]): Unit = {
    if(args.size==0){
      println("请输入日期:")
      System.exit(1)
    }
    val inputdate = args(0)
    val yesterday = DateUtils.getCurrentDatePreDate(inputdate,1)

    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN){
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("local[*]")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("yarn")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }
    spark.sparkContext.setLogLevel("WARN")
    spark.sql("use songdb")

    //一个机器可能存在很多上上报的经纬度,并且上报的经纬度可能不同
    //根据mid、经纬度分组,统计出现次数最多的那个经纬度为该机器的经纬度
    val df: DataFrame = spark.sql(
      s"""
        |select mid,x,y,cnt
        |from(select mid,x,y,cnt,
        |ROW_NUMBER() over(partition by mid order by cnt desc) rn
        |from(
        |select mid,lat x,lng y,COUNT(DISTINCT uid) cnt  --对uid去重,原因是存在一个用户可能多次定位同一个机器,
        |from to_ycak_usr_loc_d
        |where data_dt = '${inputdate}' and lat !='' and lng !=''
        |group by mid,lat,lng
        |) t1
        |) t2 where rn =1
        |""".stripMargin)


    val rdd: RDD[Row] = df.rdd.mapPartitions(iter => { //减少高德API的调用次数,高德API一次最大解析20个经纬度
      val list3 = new ListBuffer[Row]
      //将迭代器转换成list集合,(迭代器只能循环一次
      val list: List[Row] = iter.toList
      //存储发送请求的次数
      var times = 0
      if (list.size % 20 == 0) {
        times = list.size / 20
      } else {
        times = list.size / 20 + 1
      }
      for (i <- 0 until times) {
        val list1: List[Row] = list.slice(i * 20, (i + 1) * 20) //list1存储20条数据
        var yx = "" //存储20个经纬度()
        for (elem <- list1) {
          val x: String = elem.getAs[String]("x")
          val y: String = elem.getAs[String]("y")
          yx += y + "," + x + "|"
        }
        val str: HttpResponse[String] = Http("https://restapi.amap.com/v3/geocode/regeo")
          .param("key", "8e55d37b85c21037bea5521a2da03d2e")
          .param("batch", "true")
          .param("location", yx.substring(0, yx.length - 1)).asString
        val jSONObject: JSONObject = JSON.parseObject(str.body)

        if (!StringUtils.isBlank(str.body) && "10000".equals(jSONObject.getString("infocode")))  {
          val jSONArray: JSONArray = jSONObject.getJSONArray("regeocodes")
          for (i <- 0 until jSONArray.size()) {

            val currentJsonObject: JSONObject = jSONArray.getJSONObject(i)
            val MID = list1(i).getAs[String]("mid").toInt
            val X = list1(i).getAs[String]("x")
            val Y = list1(i).getAs[String]("y")
            val CNT = list1(i).getAs[Long]("cnt").toInt
            val ADDER = currentJsonObject.getString("formatted_address")
            val PRVC: String = currentJsonObject.getJSONObject("addressComponent").getString("province")
            val CTY: String = currentJsonObject.getJSONObject("addressComponent").getString("city")
            val CTY_CD: String = currentJsonObject.getJSONObject("addressComponent").getString("citycode")
            val DISTRICT: String = currentJsonObject.getJSONObject("addressComponent").getString("district")
            val AD_CD = currentJsonObject.getJSONObject("addressComponent").getString("adcode")
            val TOWN_SHIP = currentJsonObject.getJSONObject("addressComponent").getString("township")
            val TOWN_CD = currentJsonObject.getJSONObject("addressComponent").getString("towncode")
            val NB_NM = currentJsonObject.getJSONObject("addressComponent").getJSONObject("neighborhood").getString("name")
            val NB_TP = currentJsonObject.getJSONObject("addressComponent").getJSONObject("neighborhood").getString("type")
            val BD_NM = currentJsonObject.getJSONObject("addressComponent").getJSONObject("building").getString("name")
            val BD_TP = currentJsonObject.getJSONObject("addressComponent").getJSONObject("building").getString("type")
            val STREET = currentJsonObject.getJSONObject("addressComponent").getJSONObject("streetNumber").getString("street")
            val STREET_NB = currentJsonObject.getJSONObject("addressComponent").getJSONObject("streetNumber").getString("number")
            val STREET_LOC = currentJsonObject.getJSONObject("addressComponent").getJSONObject("streetNumber").getString("location")
            val STREET_DRCTION = currentJsonObject.getJSONObject("addressComponent").getJSONObject("streetNumber").getString("direction")
            val STREET_DSTANCE = currentJsonObject.getJSONObject("addressComponent").getJSONObject("streetNumber").getString("distance")
            val BUS_INFO = currentJsonObject.getJSONObject("addressComponent").getString("businessAreas")
            list3.append(Row(MID, X, Y, CNT, ADDER, PRVC, CTY, CTY_CD, DISTRICT, AD_CD, TOWN_SHIP, TOWN_CD, NB_NM, NB_TP, BD_NM, BD_TP, STREET, STREET_NB, STREET_LOC, STREET_DRCTION, STREET_DSTANCE, BUS_INFO))
          }
        }
      }
      list3.iterator
    })
    //将RDD转为dataFrame
    //1.rdd.toDF
    //2.spark.createDataFrame(rdd, schema)
    val schema: StructType = StructType(Array[StructField](
      StructField("MID", IntegerType),
      StructField("X", StringType),
      StructField("Y", StringType),
      StructField("CNT", IntegerType),
      StructField("ADDER", StringType),
      StructField("PRVC", StringType),
      StructField("CTY", StringType),
      StructField("CTY_CD", StringType),
      StructField("DISTRICT", StringType),
      StructField("AD_CD", StringType),
      StructField("TOWN_SHIP", StringType),
      StructField("TOWN_CD", StringType),
      StructField("NB_NM", StringType),
      StructField("NB_TP", StringType),
      StructField("BD_NM", StringType),
      StructField("BD_TP", StringType),
      StructField("STREET", StringType),
      StructField("STREET_NB", StringType),
      StructField("STREET_LOC", StringType),
      StructField("STREET_DRCTION", StringType),
      StructField("STREET_DSTANCE", StringType),
      StructField("BUS_INFO", StringType)
    ))
    val df2: DataFrame = spark.createDataFrame(rdd, schema)
    df2.createTempView("mac_location_day")
    //select id from 表1 except select id from 表2  筛选出表1独有的数据,差集

    //yesterday的tw_mac_loc_d表中的数据和今天上报数据统计的结果(mac_location_day)求差集,获取mid
    //根据上边的交集编号,获取对应的机器详细,
    //和今天上报的数据统计结果union在一起,得到所有机器的位置信息
    val df3: DataFrame = spark.sql(
      s"""
         |select t2.*
         |from(
         |select mid from tw_mac_loc_d where data_dt = '${yesterday}'
         |except
         |select mid from mac_location_day
         |)t1 left join tw_mac_loc_d t2 on t1.mid = t2.mid
         |where t2.data_dt = '${yesterday}'
         |
         |""".stripMargin)
    //df3的数据是来自于tw_mac_loc_d表,所以相比较df2多了一个data_dt字段
    //drop("data_dt") 删除dataFrame中的字段
    df3.drop("data_dt").union(df2).createTempView("result")
    spark.sql(
      s"""
        |insert overwrite table tw_mac_loc_d partition (data_dt = '${inputdate}')
        |select * from result
        |
        |""".stripMargin)


    spark.close()
  }

}

eds层machine - GenerateTwMacStatD

object GenerateTwMacStatD {
  def main(args: Array[String]): Unit = {
    if(args.size==0){
      println("请输入日期:")
      System.exit(1)
    }
    val inputdate = args(0)

    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN){
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("local[*]")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("yarn")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }
    spark.sql("use songdb")
    //此处,退款和消费按照mid、套餐id、支付类型分组
    spark.sql(
      s"""
        |select mid,
        |pkg_id,
        |pay_type,
        |count(distinct uid) CNSM_USR_CNT ,
        |count(ORDR_ID) REV_ORDR_CNT,
        |sum(coin_prc*coin_cnt) tot_rev
        |from tw_cnsm_brief_d
        |where data_dt = '${inputdate}' and abn_typ = 0
        |group by mid,pkg_id,pay_type
        |
        |""".stripMargin).createTempView("cnsm_rev")
    spark.sql(
      s"""
        |select mid,
        |pkg_id,
        |pay_type,
        |count(distinct uid) REF_USR_CNT ,
        |count(ORDR_ID) REF_ORDR_CNT,
        |abs(sum(coin_prc*coin_cnt)) TOT_REF
        |from tw_cnsm_brief_d
        |where data_dt = '${inputdate}' and abn_typ = 2
        |group by mid,pkg_id,pay_type
        |""".stripMargin).createTempView("cnsm_ref")
    //新增用户数:查询注册日期在今天的所用用户数
    spark.sql(
      s"""
        |select reg_mid mid,count(*) NEW_USR_CNT  -- 新增用户
        |from tw_usr_baseinfo_d
        |where data_dt = '${inputdate}' -- and  reg_dt = '${inputdate}'
        |group by reg_mid
        |""".stripMargin).createTempView("new_user")

      spark.sql(
        s"""
          |insert overwrite table TW_MAC_STAT_D partition (data_dt='${inputdate}')
          |select
          |	t1.MID,
          |	t1.MAC_NM,
          |	t1.PRDCT_TYP PRDCT_TYPE,
          |	t1.STORE_NM,
          |	t1.BUS_MODE,
          |	t1.PAY_SW,
          |	t1.SCENCE_CATGY,
          |	t1.SUB_SCENCE_CATGY,
          |	t1.SCENE,
          |	t1.SUB_SCENE,
          |	t1.BRND,
          |	t1.SUB_BRND,
          |	t2.PRVC,
          |	t2.CTY,
          |	t2.DISTRICT  AREA,
          |	t1.PRTN_NM   AGE_ID,
          |	t1.INV_RATE,
          |	t1.AGE_RATE,
          |	t1.COM_RATE,
          |	t1.PAR_RATE,
          |	t3.PKG_ID,
          |	t3.PAY_TYPE,
          |	nvl(t3.CNSM_USR_CNT,0),
          |	nvl(t4.REF_USR_CNT,0),
          |	nvl(t5.NEW_USR_CNT,0),
          |	nvl(t3.REV_ORDR_CNT,0),
          |	nvl(t4.REF_ORDR_CNT,0),
          |	nvl(t3.TOT_REV,0),
          |	nvl(t4.TOT_REF,0)
          |from tw_mac_baseinfo_d t1
          |left join tw_mac_loc_d t2 on t1.mid = t2.mid
          |left join cnsm_rev t3 on t1.mid = t3.mid
          |left join cnsm_ref t4 on t3.mid = t4.mid  and t3.PKG_ID = t4.PKG_ID and t3.PAY_TYPE = t4.PAY_TYPE
          |left join new_user t5 on t1.mid = t5.mid
          |where t1.data_dt ='2022-03-20' and t2.data_dt='${inputdate}'
          |
          |""".stripMargin)


    spark.close()
  }

}

eds层content - GenerateTwSongBaseinfoD

object GenerateTwSongBaseinfoD {
  def main(args: Array[String]): Unit = {
    val inputdate = args(0)
    if(inputdate.size==0){
      println("请输入正确的日期")
      System.exit(1) //程序终止
    }
    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN) {
      spark = SparkSession
        .builder()
        .master("local[*]")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris",ConfigUtils.HIVE_METASTORE_URIS)
        .config("spark.sql.shuffle.partitions",2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .master("yarn")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS)
        .config("spark.sql.shuffle.partitions",2)
        .enableHiveSupport()
        .getOrCreate()
    }

    //定义单行函数,获取ALBUM字段json中的专辑名称
    //
    spark.udf.register("getAlbum",(album:String)=>{
     var name:String = null
      try{
      name= JSON.parseArray(album).getJSONObject(0).getString("name")
      }catch {
        case e:Exception => {
          if (album != null && album.startsWith("《") && album.endsWith("》")){
            name = album
          }else {
            name = "《暂无专辑》"
          }
        }
      }
      name
    })
    //定义单行函数,获取SINGER_INFO字段json中的歌手姓名,歌手id

    spark.udf.register("getSingerName_ID",(singerinfo:String,singer:String,nameOrID:String)=> {
      var singerNameOrID=""
      try{
        val singerARR: JSONArray = JSON.parseArray(singerinfo)
        if("singer1".equals(singer) && "name".equals(nameOrID) && singerARR.size()>0){
          singerNameOrID=singerARR.getJSONObject(0).getString("name")
        }else if ("singer1".equals(singer) && "id".equals(nameOrID) && singerARR.size()>0){
          singerNameOrID=singerARR.getJSONObject(0).getString("id")
        }else if ("singer2".equals(singer) && "name".equals(nameOrID) && singerARR.size()>1){
          singerNameOrID=singerARR.getJSONObject(1).getString("name")
        }else if ("singer2".equals(singer) && "id".equals(nameOrID) && singerARR.size()>1){
          singerNameOrID=singerARR.getJSONObject(1).getString("id")
        }

      }catch{
        case e:Exception => {
          println("singerinfo = " + singerinfo + ",error = " + e.getMessage)
        }
      }
      singerNameOrID
    })

 //定义单行函数,提取AUTH_CO的name值,如果没有则使用乐心曲库
    spark.udf.register("getAUTH_NAME",(AUTH_CO:String)=>{
      var AUTH_NAME: String = "乐心曲库"  //默认值
      try {
        //如果AUTH_CO不为空,进入循环
          val jSONObject: JSONObject = JSON.parseObject(AUTH_CO)
      AUTH_NAME = jSONObject.getString("name")
      }catch {
        case e:Exception => {
          AUTH_NAME=AUTH_CO
        }
      }
      AUTH_NAME

    })
//定义单行函数,将PRDCT_TYPE中String转换为数组
    spark.udf.register("stringTOArray",(PRDCT_TYPE:String)=>{
      var list = new ListBuffer[Int]
      try{
        if(!StringUtils.isBlank(PRDCT_TYPE)){
          val str: Array[String] = PRDCT_TYPE.stripSuffix("]").stripPrefix("[").split(",")
         for(t <-str){
           list.append(t.toDouble.toInt)
         }
        }

      }
      catch {
        case e:Exception => {
            println("stringTOArray: "+e.getMessage)
        }
      }
      list
    })


//定义单行函数,将post_time转换格式
    spark.udf.register("date",(post_time:String)=>{
      var time:String = ""
      try{
          val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        time = sdf.format(post_time.toDouble)
      }catch {
        case e:Exception => {
          time = post_time
        }
      }
      time
    })

    spark.sql("use songdb")
    val sql =
      """
        |insert overwrite table TW_SONG_BASEINFO_D
        |select nbr,
        |nvl(name,other_name) name,
        |source,
        |getAlbum(album) album,
        |prdct,lang,video_format,dur,
        |getSingerName_ID(singer_info,"singer1","name") singer1,
        |getSingerName_ID(singer_info,"singer2","name") singer2,
        |getSingerName_ID(singer_info,"singer1","id") singer1id,
        |getSingerName_ID(singer_info,"singer2","id") singer2id,
        |0 mactime,
        |date(post_time) post_time,
        |pinyin_fst,
        |pinyin,
        |sing_type,
        |ori_singer,
        |lyricist,
        |composer,
        |bpm_val,
        |star_level,
        |video_qlty,
        |video_mk,
        |video_ftur,
        |lyric_ftur,
        |IMG_QLTY,
        |subtitles_type,
        |AUDIO_FMT,
        |ORI_SOUND_QLTY,
        |ORI_TRK,
        |ORI_TRK_VOL,
        |ACC_VER,
        |ACC_QLTY,
        |ACC_TRK_VOL,
        |ACC_TRK,
        |WIDTH,
        |HEIGHT,
        |VIDEO_RSVL,
        |SONG_VER,
        |getAUTH_NAME(AUTH_CO) AUTH_CO,
        |STATE,
        |stringTOArray(PRDCT_TYPE) PRDCT_TYPE
        |from to_song_info_d
        |where nbr != " "
        |""".stripMargin
    spark.sql(sql)

    spark.close()
  }

}

dm层content - GenerateTmSongerRsiD

object GenerateTmSongerRsiD
{
  def main(args: Array[String]): Unit = {
    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN) {
      spark = SparkSession
        .builder()
        .master("local[*]")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris",ConfigUtils.HIVE_METASTORE_URIS)
        .config("spark.sql.shuffle.partitions",2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .master("yarn")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS)
        .config("spark.sql.shuffle.partitions",2)
        .enableHiveSupport()
        .getOrCreate()
    }


    var inputdate = args(0)
    if(inputdate.size == 0){
      println("请输入日期")
      System.exit(1)
    }

    spark.sql("use songdb")

    spark.sql(
      s"""
        |--一天的数据
        select singer ,singerid,
        |sum(sing_cnt) sing_cnt,
        |sum(supp_cnt) supp_cnt,
        |SUM(RCT_7_SING_CNT) RCT_7_SING_CNT,
        |sum(RCT_7_SUPP_CNT) RCT_7_SUPP_CNT,
        |sum(RCT_7_TOP_SING_CNT) RCT_7_TOP_SING_CNT,
        |sum(RCT_7_TOP_SUPP_CNT) RCT_7_TOP_SUPP_CNT,
        |SUM(RCT_30_SING_CNT) RCT_30_SING_CNT,
        |sum(RCT_30_SUPP_CNT) RCT_30_SUPP_CNT,
        |sum(RCT_30_TOP_SING_CNT) RCT_30_TOP_SING_CNT,
        |sum(RCT_30_TOP_SUPP_CNT) RCT_30_TOP_SUPP_CNT
        |from(
        |select singer1 singer,singer1id singerid,sing_cnt,supp_cnt,RCT_7_SING_CNT,RCT_7_SUPP_CNT,RCT_30_SING_CNT,RCT_30_SUPP_CNT,
        |RCT_7_TOP_SING_CNT,RCT_7_TOP_SUPP_CNT,RCT_30_TOP_SING_CNT,RCT_30_TOP_SUPP_CNT
        |from TW_SONG_FTUR_D
        |where data_dt = '${inputdate}'and singer2 is not null and singer2 != ""
        |union all
        |select singer2 singer,singer2id singerid,sing_cnt,supp_cnt,RCT_7_SING_CNT,RCT_7_SUPP_CNT,RCT_30_SING_CNT,RCT_30_SUPP_CNT,
        |RCT_7_TOP_SING_CNT,RCT_7_TOP_SUPP_CNT,RCT_30_TOP_SING_CNT,RCT_30_TOP_SUPP_CNT
        |from TW_SONG_FTUR_D
        |where data_dt = '${inputdate}' and singer2 is not null and singer2 != ""
        |) t1
        |group by singer,singerid
        |""".stripMargin).createTempView("singerHeat")


    spark.sql(
      s"""
        |insert overwrite table TW_SINGER_RSI_D
        |partition (data_dt ='${inputdate}')
        |select 1 period ,t1.singer_id singer_id,t1.singer_name singer_name,t1.rsi rsi,
        |rank() over (order by t1.rsi desc) rsi_rank
        |from(
        |select
        |singerid singer_id,
        |singer singer_name,
        |pow((log(sing_cnt/1+1)*0.63+log(sing_cnt+1)*0.37)*0.8+(log(supp_cnt/1+1)*0.63+log(supp_cnt+1)*0.37)*0.2,2)*10 RSI
        |from singerHeat
        |) t1
        |union all
        |select 7 period ,t2.singer_id singer_id,t2.singer_name singer_name,t2.rsi rsi,
        |rank() over (order by t2.rsi desc) rsi_rank
        |from(
        |select
        |singerid singer_id,
        |singer singer_name,
        |pow((log(RCT_7_SING_CNT/7+1)*0.63+log(RCT_7_TOP_SING_CNT+1)*0.37)*0.8+(log(RCT_7_SUPP_CNT/7+1)*0.63+log(RCT_7_TOP_SUPP_CNT+1)*0.37)*0.2,2)*10 RSI
        |from singerHeat
        |) t2
        |union all
        |select 30 period ,t3.singer_id singer_id,t3.singer_name singer_name,t3.rsi rsi,
        |rank() over (order by t3.rsi desc) rsi_rank
        |from(
        |select
        |singerid singer_id,
        |singer singer_name,
        |pow((log(RCT_30_SING_CNT/30+1)*0.63+log(RCT_30_TOP_SING_CNT+1)*0.37)*0.8+(log(RCT_30_SUPP_CNT/30+1)*0.63+log(RCT_30_TOP_SUPP_CNT+1)*0.37)*0.2,2)*10 RSI
        |from singerHeat
        |) t3
        |""".stripMargin)

    val pro= new Properties()
    pro.put("user",ConfigUtils.MYSQL_USER)
    pro.put("password",ConfigUtils.MYSQL_PASSWORD)

    val df: DataFrame = spark.sql(
      s"""
         |select * from TW_SINGER_RSI_D where data_dt = '${inputdate}'
         |""".stripMargin)
    df.write.mode(SaveMode.Append)jdbc(ConfigUtils.MYSQL_URL,"tm_singer_rsi",pro)
    spark.close()
  }

}

dm层content - GenerateTmSongRsiD

object GenerateTmSongRsiD {
  def main(args: Array[String]): Unit = {
    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN) {
      spark = SparkSession
        .builder()
        .master("local[*]")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris",ConfigUtils.HIVE_METASTORE_URIS)
        .config("spark.sql.shuffle.partitions",2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .master("yarn")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS)
        .config("spark.sql.shuffle.partitions",2)
        .enableHiveSupport()
        .getOrCreate()
    }


    var inputdate = args(0)
    if(inputdate.size == 0){
      println("请输入日期")
      System.exit(1)
    }

    spark.sql("use songdb")
   spark.sql(
      s"""
        |insert overwrite table TW_SONG_RSI_D
        |partition (data_dt = '${inputdate}')
        |select 1 period,t1.nbr,t1.name,t1.rsi,
        |rank() over(order by t1.rsi desc) rsi_rank
        |from (
        |select nbr,name,
      --pow(10,2)  表示求10的2次方 == 100
      --log10  表示求e的多少次方 == 10
        |pow((log(sing_cnt/1+1)*0.63+log(sing_cnt+1)*0.37)*0.8+(log(supp_cnt/1+1)*0.63+log(supp_cnt+1)*0.37)*0.2,2)*10 RSI
        |from TW_SONG_FTUR_D
        |where data_dt = '${inputdate}'
        |) t1
        |union all
        |select 7 period,t2.nbr,t2.name,t2.rsi,
        |rank() over(order by t2.rsi desc) rsi_rank
        |from (
        |select nbr,name,
        |pow((log(RCT_7_SING_CNT/7+1)*0.63+log(RCT_7_TOP_SING_CNT+1)*0.37)*0.8+(log(RCT_7_SUPP_CNT/7+1)*0.63+log(RCT_7_TOP_SUPP_CNT+1)*0.37)*0.2,2)*10 RSI
        |from TW_SONG_FTUR_D
        |where data_dt = '${inputdate}'
        |) t2
        |union all
        |select 30 period,t3.nbr,t3.name,t3.rsi,
        |rank() over(order by t3.rsi desc) rsi_rank
        |from (
        |select nbr,name,
        |pow((log(RCT_30_SING_CNT/30+1)*0.63+log(RCT_30_TOP_SING_CNT+1)*0.37)*0.8+(log(RCT_30_SUPP_CNT/30+1)*0.63+log(RCT_30_TOP_SUPP_CNT+1)*0.37)*0.2,2)*10 RSI
        |from TW_SONG_FTUR_D
        |where data_dt = '${inputdate}'
        |) t3
        |""".stripMargin)

    val pro= new Properties()
      pro.put("user",ConfigUtils.MYSQL_USER)
      pro.put("password",ConfigUtils.MYSQL_PASSWORD)

    /*df.withColumn("rsi",
      (log(col("sing_cnt")/1+1)*0.63+log(col("sing_cnt")+1)*0.37)*0.8+
        (log(col("supp_cnt")/1+1)*0.63+log(col("supp_cnt")+1)*0.37)*0.2
      )*/
    val df: DataFrame = spark.sql(
      s"""
         |select * from TW_SONG_RSI_D where data_dt = '${inputdate}'
         |""".stripMargin)
    df.write.mode(SaveMode.Append).jdbc(ConfigUtils.MYSQL_URL,"tm_song_rsi",pro)
    spark.close()
  }

}

dm层machine - GenerateTmMacRegionStatD

object GenerateTmMacRegionStatD {
  def main(args: Array[String]): Unit = {
    val inputdate = args(0)
    if(inputdate.size==0){
      println("请输入正确的日期")
      System.exit(1) //程序终止
    }
    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN) {
      spark = SparkSession
        .builder()
        .master("local[*]")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris",ConfigUtils.HIVE_METASTORE_URIS)
        .config("spark.sql.shuffle.partitions",2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .master("yarn")
        .appName(this.getClass.getName)
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS)
        .config("spark.sql.shuffle.partitions",2)
        .enableHiveSupport()
        .getOrCreate()
    }
    spark.sql("use songdb")
    spark.sql(
      s"""
        |insert overwrite table TM_MAC_REGION_STAT_D partition (data_dt = '${inputdate}')
        |SELECT
        |prvc,
        |cty,
        |count(DISTINCT mid) mac_cnt,
        |sum(tot_rev) mac_rev,
        |sum(tot_ref) mac_ref,
        |sum(REV_ORDR_CNT) MAC_REV_ORDR_CNT,
        |sum(REF_ORDR_CNT) MAC_REF_ORDR_CNT,
        |sum(CNSM_USR_CNT) MAC_CNSM_USR_CNT,
        |sum(REF_USR_CNT) MAC_REF_USR_CNT
        |from tw_mac_stat_d
        |where data_dt = '${inputdate}' and cty != '[]'
        |group by prvc,cty
        |
        |""".stripMargin)

    val pro = new Properties()
    pro.setProperty("user",ConfigUtils.MYSQL_USER)
    pro.setProperty("password",ConfigUtils.MYSQL_PASSWORD)

    val df: DataFrame = spark.sql(s"select * from TM_MAC_REGION_STAT_D where data_dt = '${inputdate}'")
    df.write.mode(SaveMode.Overwrite).jdbc(ConfigUtils.MYSQL_URL,"TM_MAC_REGION_STAT_D",pro)
    spark.close()
  }
}

dm层 - GenerateTmUsrMrchntStatD

object GenerateTmUsrMrchntStatD {
  def main(args: Array[String]): Unit = {
    if(args.size==0){
      println("请输入日期:")
      System.exit(1)
    }
    val inputdate = args(0)

    var spark: SparkSession = null
    if(ConfigUtils.LOCAL_RUN){
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("local[*]")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }else {
      spark = SparkSession
        .builder()
        .appName(this.getClass.getName)
        .master("yarn")
        .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
        .config("spark.sql.shuffle.partition", 2)
        .enableHiveSupport()
        .getOrCreate()
    }
    spark.sql("use songdb")

    spark.sql(
      s"""
         |insert overwrite table TM_USR_MRCHNT_STAT_D  partition (data_dt = '${inputdate}')
         |select
         |AGE_ID,
         |PAY_TYPE,
         |sum(REV_ORDR_CNT)    REV_ORDR_CNT, -- 总营收订单数
         |sum(REF_ORDR_CNT)    REF_ORDR_CNT, -- 总退款订单数
         |cast(sum(TOT_REV) as double)         TOT_REV, -- 总营收
         |cast(sum(TOT_REF) as double)	        TOT_REF, --总退款
         |cast(sum(TOT_REV * nvl(INV_RATE,0)*0.01) as decimal(10,2))    TOT_INV_REV, -- 投资人营收 = 营收比例 * 总营收
         |cast(sum(TOT_REV * nvl(AGE_RATE,0)*0.01) as decimal(10,2))    TOT_AGE_REV, -- 代理人营收
         |cast(sum(TOT_REV * nvl(COM_RATE,0)*0.01) as decimal(10,2))    TOT_COM_REV, -- 公司营收
         |cast(sum(TOT_REV * nvl(PAR_RATE,0)*0.01) as decimal(10,2))    TOT_PAR_REV  -- 合伙人营收
         |from tw_mac_stat_d
         |where data_dt = '${inputdate}'
         |group by AGE_ID,PAY_TYPE
         |
         |""".stripMargin)
    val prop = new Properties()
    prop.setProperty("user",ConfigUtils.MYSQL_USER)
    prop.setProperty("password",ConfigUtils.MYSQL_PASSWORD)

    val df: DataFrame = spark.sql(s"select * from TM_USR_MRCHNT_STAT_D where data_dt = '${inputdate}'")
    df.write.mode(SaveMode.Overwrite).jdbc(ConfigUtils.MYSQL_URL,"TM_USR_MRCHNT_STAT_D",prop)
    spark.close()
  }
}

streaming层 - RealTimeHotSong

object RealTimeHotSong {
  def main(args: Array[String]): Unit = {
    val sparkConf: SparkConf = new SparkConf().setMaster("yarn").setAppName(this.getClass.getName)
    val ssc = new StreamingContext(sparkConf, Seconds(10))
    ssc.sparkContext.setLogLevel("WARN")
    val spark: SparkSession = SparkSession
      .builder()
      .master("yarn")
      .appName(this.getClass.getName)
      .config("hive.metastore.uris", ConfigUtils.HIVE_METASTORE_URIS) //metastore地址
      .config("spark.sql.shuffle.partitions", 1)
      .getOrCreate()

    import spark.implicits._

    val kafkaMap = Map(
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringDeserializer",
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringDeserializer",
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> ConfigUtils.KAFKA_BROKERS,
      ConsumerConfig.GROUP_ID_CONFIG -> "g2"
    )
    val ds: InputDStream[ConsumerRecord[String,String]] = KafkaUtils.createDirectStream(ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String,String](List("mytopic2"), kafkaMap)
    )
    val ds2: DStream[(String, Int)] = ds.map(v => {
      val songname: String = JSON.parseObject(v.value()).getString("songname")
      (songname, 1)
    }).reduceByKeyAndWindow((v1: Int, v2: Int) => v1 + v2, Seconds(60), Seconds(10))

    val pro = new Properties()
    pro.setProperty("user",ConfigUtils.MYSQL_USER)
    pro.setProperty("password",ConfigUtils.MYSQL_PASSWORD)

    //dStream --> rdd --> DataFrame
    ds2.foreachRDD(rdd => {
      rdd.toDF("songname","cnt").createOrReplaceTempView("t_song")//toDF需要隐式转换
      spark.sql(
        """
          |select songname,cnt,rn
          |from (
          |select songname,cnt,row_number() over(order by cnt desc) rn
          |from t_song) t1
          |where rn<=30
          |
          |""".stripMargin).write.mode(SaveMode.Overwrite).jdbc(ConfigUtils.MYSQL_URL,"song_rn",pro)

    })
 /*val sparkConf = new SparkConf().setMaster("local[*]").setAppName("RealTimeHotSong")
    val ssc = new StreamingContext(sparkConf,Seconds(10))

    //-------------------------- 引入sparkSession对象 导入隐式转换的代码  -------------------
    val spark: SparkSession = SparkSession
      .builder()
      .master("local[2]")
      .appName("stream")
      .config("spark.sql.shuffle.partitions", 1)
      .getOrCreate()

    //需要加上隐式转换 -- RDD → DataFrame
    import spark.implicits._
    //---------------------------------------------------------------------------------

    val map = Map[String,Object](
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG->classOf[StringDeserializer],
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG->classOf[StringDeserializer],
      ConsumerConfig.GROUP_ID_CONFIG->"g1",
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG->ConfigUtils.KAFKA_BROKERS
    )
    val ds: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String,String](List("mytopic2"), map))

    val ds2: DStream[(String, Int)] = ds.map(v => {
      val v2: String = v.value()
      val jSONObject: JSONObject = JSON.parseObject(v2)
      val songName: String = jSONObject.getString("songname")
      (songName, 1)
    }).reduceByKeyAndWindow((v1: Int, v2: Int) => v1 + v2, Seconds(60), Seconds(10))

    val prop = new Properties()
    prop.setProperty("user",ConfigUtils.MYSQL_USER)
    prop.setProperty("password",ConfigUtils.MYSQL_PASSWORD)

    //获取热度最高的前30首歌曲
    //dStream --> rdd --> DataFrame
    ds2.foreachRDD(rdd => {
      rdd.toDF("songname","cnt").createOrReplaceTempView("t_song")
      spark.sql(
        """
          |
          |select songname,cnt,rn from(
          | select songname,cnt,row_number() over(order by cnt desc) rn
          | from t_song
          |)t1 where rn <= 30
          |
          |""".stripMargin).write.mode(SaveMode.Overwrite).jdbc(ConfigUtils.MYSQL_URL,"song_rn",prop)
    })*/

    ssc.start()
    ssc.awaitTermination()
  }

}

streaming层 - RealTimePVUV

object RealTimePVUV {
  def main(args: Array[String]): Unit = {
    val sparkConf: SparkConf = new SparkConf().setMaster("yarn").setAppName(this.getClass.getName)
    val ssc = new StreamingContext(sparkConf, Seconds(10))
    ssc.sparkContext.setLogLevel("WARN")

    val kafkaMap = Map(
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringDeserializer",
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringDeserializer",
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> ConfigUtils.KAFKA_BROKERS,
      ConsumerConfig.GROUP_ID_CONFIG -> "g1"
    )
    val ds: InputDStream[ConsumerRecord[String,String]] = KafkaUtils.createDirectStream(ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String,String](List("mytopic"), kafkaMap)
    )
    ds.map(v => {//v是一个DStream对象,v.value可以获取数据
      val mid: String = JSON.parseObject(v.value()).getString("mid")
      (mid,1)
    }).reduceByKeyAndWindow((v1:Int,v2:Int)=>v1+v2,Seconds(60),Seconds(10))
      .foreachRDD(rdd => {
        rdd.foreachPartition(iter => {
          val jedis = new Jedis("hadoop12",6379)
          jedis.auth("123")
          iter.foreach(v=>{
            jedis.hset("pv",v._1,v._2.toString)
          })
          jedis.close()
        })
      })
    //统计uv
   ds.map(v => {//v是一个DStream对象,v.value可以获取数据
      val mid: String = JSON.parseObject(v.value()).getString("mid")
      val uid: String = JSON.parseObject(v.value()).getString("uid")
      (mid,uid)
    }).transform(rdd => rdd.distinct()).map(v=>(v._2,1)).reduceByKeyAndWindow((v1:Int,v2:Int)=>v1+v2,Seconds(60),Seconds(10))
      .foreachRDD(rdd => {
        rdd.foreachPartition(iter => {
          val jedis = new Jedis("hadoop12",6379)
          jedis.auth("123")
          iter.foreach(v=>{
            jedis.hset("uv",v._1,v._2.toString)
          })
          jedis.close()
        })
      })
    ssc.start()
    ssc.awaitTermination()
  }
}

common层

object ConfigUtils {
  /**
    *  ConfigFactory.load() 默认加载classpath下的application.conf,application.json和application.properties文件。
    *
    */
  lazy val load: Config = ConfigFactory.load()
  val LOCAL_RUN = load.getBoolean("local.run")
  val HIVE_METASTORE_URIS = load.getString("hive.metastore.uris")
  val HIVE_DATABASE = load.getString("hive.database")
  val HDFS_CLIENT_LOG_PATH = load.getString("clientlog.hdfs.path")
  val MYSQL_URL = load.getString("mysql.url")
  val MYSQL_USER = load.getString("mysql.user")
  val MYSQL_PASSWORD = load.getString("mysql.password")
  val TOPIC = load.getString("kafka.userloginInfo.topic")
  val USER_PLAY_SONG_TOPIC = load.getString("kafka.userplaysong.topic")
  val KAFKA_BROKERS = load.getString("kafka.cluster")
  val REDIS_HOST = load.getString("redis.host")
  val REDIS_PORT = load.getInt("redis.port")
  val REDIS_OFFSET_DB = load.getInt("redis.offset.db")
  val REDIS_DB = load.getInt("redis.db")
}



object DateUtils {

  /**
    * 将字符串格式化成"yyyy-MM-dd HH:mm:ss"格式
    * @param stringDate
    * @return
    */
  def formatDate(stringDate:String):String = {
    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    var formatDate = ""
    try{
      formatDate = sdf.format(sdf.parse(stringDate))
    }catch{
      case e:Exception=>{
        try{
          val bigDecimal = new BigDecimal(stringDate)
          val date = new Date(bigDecimal.longValue())
          formatDate = sdf.format(date)
        }catch{
          case e:Exception=>{
            formatDate
          }
        }
      }
    }
    formatDate
  }

  /**
    * 获取输入日期的前几天的日期
    * @param currentDate yyyyMMdd
    * @param i 获取前几天的日期
    * @return yyyyMMdd
    */
  def getCurrentDatePreDate(currentDate:String,i:Int) = {
    val sdf = new SimpleDateFormat("yyyy-MM-dd")
    val date: Date = sdf.parse(currentDate)
    val calendar = Calendar.getInstance()
    calendar.setTime(date)
    calendar.add(Calendar.DATE,-i)
    val perDate = calendar.getTime
    sdf.format(perDate)
  }
}


object StringUtils {
  /**
    * 检查字符串是否为空
    * @param str
    * @return
    */
  def checkString(str:String) = {
    if(str == null || "".equals(str)) "" else str
  }
}

文件名称  - PairRDDMultipleTextOutputFormat

class PairRDDMultipleTextOutputFormat extends MultipleTextOutputFormat[String,String]{
  //使用元组中的key作为文件名
  override def generateFileNameForKeyValue(key: String, value: String, name: String): String = key
  //不让他生成key
  override def generateActualKey(key: String, value: String): String = null
}

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值