【无标题】


object EdpiAnalyseFile {

  @transient val logger = LoggerFactory.getLogger(EdpiAnalyseFile.getClass)

  def main(args: Array[String]): Unit = {

    val date = args(0) // yyyyMMdd
    val queueName = if(args(1) != null) args(1) else "crflow"
    val input = s"hdfs://hdfsunity/data/zb_zhw_gw_rt/hive/warehouse/dws_zb_zhw_gw_rt.db/dws_user_netquality_edpi_user_req_iptype_topn_domain_15minsnapshot_d_inc/day_id=${date}/*/"
//    val input = "input/day/prov"

    logger.warn("运行日期 =>" + date)
    logger.warn("日志目录 =>" + input)

    val bool1 = ZhichaModel.getModel2IsZhicha(1, 4, 500, 0.05, 1024)
    println(bool1)

    val spark = SparkSession.builder()
      .appName("EdpiAnalyseFile")
//      .master("local[*]")
      .config("spark.sql.parquet.binaryAsString", "true")
      .config("spark.yarn.queue", queueName)
      .enableHiveSupport()
      .getOrCreate()

    val beginTime = System.currentTimeMillis()
    val sc = spark.sparkContext
    val dateBC = sc.broadcast(date)
    val city4ListBC = sc.broadcast(EdpiConstant.CITY_LIST) // 表中数据,直辖市可能为空,特殊处理
    val provMappingBC = sc.broadcast(EdpiConstant.PROV_MAPPING)
    val df = spark.read.parquet(input)

    val tmpRes = df.rdd.mapPartitions(iter => {
      iter.flatMap(row => {
        val city4List = city4ListBC.value
        val entity = new EdpiDwsEntity15Min(row)
        val provinceName = entity.province_cn
        val city = if (city4List.contains(provinceName)) provinceName else entity.city_cn
        val account = entity.pppoe_account
        val brasIp = entity.bras_ipv4_address
        val lev3Domain = entity.lev3_domain
        val pppoeAccount = entity.pppoe_account
        val dstIp = entity.dst_ip
        val agg15minuteTime = entity.agg_15minute_time

        //1宽带账户,2专线账户
        val userType = if (Aaa_Clean.getACtype(s"${provinceName}", account) != 2) 1 else 2
        // 资源ID website_id: 通过二级域名映射而来,这里存储二级域名
        val lev2_domain = if (RSDomainUtil.transformUrl(2, lev3Domain) == null) lev3Domain else RSDomainUtil.transformUrl(2, lev3Domain)


        // 第一张表 tbl_content_edpi_desc_info
        val edpiDesc1stIpv4ProvKey = ("edpi_desc_1st", 1, provinceName, 4, userType, "-1", "-1", "", "", "", "")
        val edpiDesc1stIpv4CityKey = ("edpi_desc_1st", 2, provinceName, 4, userType, city, "-1", "", "", "", "")
        val edpiDesc1stIpv4Barskey = ("edpi_desc_1st", 3, provinceName, 4, userType, city, brasIp, "", "", "", "")
        val edpiDesc1stIpv6ProvKey = ("edpi_desc_1st", 1, provinceName, 6, userType, "-1", "-1", "", "", "", "")
        val edpiDesc1stIpv6CityKey = ("edpi_desc_1st", 2, provinceName, 6, userType, city, "-1", "", "", "", "")
        val edpiDesc1stIpv6Barskey = ("edpi_desc_1st", 3, provinceName, 6, userType, city, brasIp, "", "", "", "")

        // 第二张表 tbl_website_point_new_info  网站:应用ID天粒度
        val websitePoint2ndIpv4ProvKey = ("website_point_2nd", 1, provinceName, 4, -1, "-1", "-1", lev2_domain, "", "", "")
        val websitePoint2ndIpv4CityKey = ("website_point_2nd", 2, provinceName, 4, -1, city, "-1", lev2_domain, "", "", "")
        val websitePoint2ndIpv4Barskey = ("website_point_2nd", 3, provinceName, 4, -1, city, brasIp, lev2_domain, "", "", "")
        val websitePoint2ndIpv6ProvKey = ("website_point_2nd", 1, provinceName, 6, -1, "-1", "-1", lev2_domain, "", "", "")
        val websitePoint2ndIpv6CityKey = ("website_point_2nd", 2, provinceName, 6, -1, city, "-1", lev2_domain, "", "", "")
        val websitePoint2ndIpv6Barskey = ("website_point_2nd", 3, provinceName, 6, -1, city, brasIp, lev2_domain, "", "", "")

        // 第三张表 tbl_alive_quality_info
        val aliveQuality3rdIpv4ProvKey = ("alive_quality_3rd", 1, provinceName, 4, userType, "-1", "-1", "", pppoeAccount, "", "")
        val aliveQuality3rdIpv4CityKey = ("alive_quality_3rd", 2, provinceName, 4, userType, city, "-1", "", pppoeAccount, "", "")
        val aliveQuality3rdIpv4Barskey = ("alive_quality_3rd", 3, provinceName, 4, userType, city, brasIp, "", pppoeAccount, "", "")
        val aliveQuality3rdIpv6ProvKey = ("alive_quality_3rd", 1, provinceName, 6, userType, "-1", "-1", "", pppoeAccount, "", "")
        val aliveQuality3rdIpv6CityKey = ("alive_quality_3rd", 2, provinceName, 6, userType, city, "-1", "", pppoeAccount, "", "")
        val aliveQuality3rdIpv6Barskey = ("alive_quality_3rd", 3, provinceName, 6, userType, city, brasIp, "", pppoeAccount, "", "")

        // 第四张表 tbl_website_analyse_info_ip
        val websiteAnalyse4thIpv4ProvKey = ("website_analyse_4th", 1, provinceName, 4, -1, "-1", "-1", lev2_domain, pppoeAccount, dstIp, "")
        val websiteAnalyse4thIpv4CityKey = ("website_analyse_4th", 2, provinceName, 4, -1, city, "-1", lev2_domain, pppoeAccount, dstIp, "")
        val websiteAnalyse4thIpv4Barskey = ("website_analyse_4th", 3, provinceName, 4, -1, city, brasIp, lev2_domain, pppoeAccount, dstIp, "")
        val websiteAnalyse4thIpv6ProvKey = ("website_analyse_4th", 1, provinceName, 6, -1, "-1", "-1", lev2_domain, pppoeAccount, dstIp, "")
        val websiteAnalyse4thIpv6CityKey = ("website_analyse_4th", 2, provinceName, 6, -1, city, "-1", lev2_domain, pppoeAccount, dstIp, "")
        val websiteAnalyse4thIpv6Barskey = ("website_analyse_4th", 3, provinceName, 6, -1, city, brasIp, lev2_domain, pppoeAccount, dstIp, "")

        // 第七张表 tbl_website_domain_point_new_info
        val websiteDomainPoint7thIpv4ProvKey = ("website_domain_point_7th", 1, provinceName, 4, -1, "-1", "-1", lev2_domain, "", "", agg15minuteTime)
        val websiteDomainPoint7thIpv4CityKey = ("website_domain_point_7th", 2, provinceName, 4, -1, city, "-1", lev2_domain, "", "", agg15minuteTime)
        val websiteDomainPoint7thIpv4Barskey = ("website_domain_point_7th", 3, provinceName, 4, -1, city, brasIp, lev2_domain, "", "", agg15minuteTime)
        val websiteDomainPoint7thIpv6ProvKey = ("website_domain_point_7th", 1, provinceName, 6, -1, "-1", "-1", lev2_domain, "", "", agg15minuteTime)
        val websiteDomainPoint7thIpv6CityKey = ("website_domain_point_7th", 2, provinceName, 6, -1, city, "-1", lev2_domain, "", "", agg15minuteTime)
        val websiteDomainPoint7thIpv6Barskey = ("website_domain_point_7th", 3, provinceName, 6, -1, city, brasIp, lev2_domain, "", "", agg15minuteTime)


        Array(
          (edpiDesc1stIpv4ProvKey, new EdpiDwsEntity15Min(row)),
          (edpiDesc1stIpv4CityKey, new EdpiDwsEntity15Min(row)),
          (edpiDesc1stIpv4Barskey, new EdpiDwsEntity15Min(row)),
          (edpiDesc1stIpv6ProvKey, new EdpiDwsEntity15Min(row)),
          (edpiDesc1stIpv6CityKey, new EdpiDwsEntity15Min(row)),
          (edpiDesc1stIpv6Barskey, new EdpiDwsEntity15Min(row)),

          (websitePoint2ndIpv4ProvKey, new EdpiDwsEntity15Min(row)),
          (websitePoint2ndIpv4CityKey, new EdpiDwsEntity15Min(row)),
          (websitePoint2ndIpv4Barskey, new EdpiDwsEntity15Min(row)),
          (websitePoint2ndIpv6ProvKey, new EdpiDwsEntity15Min(row)),
          (websitePoint2ndIpv6CityKey, new EdpiDwsEntity15Min(row)),
          (websitePoint2ndIpv6Barskey, new EdpiDwsEntity15Min(row)),

          (aliveQuality3rdIpv4ProvKey, new EdpiDwsEntity15Min(row)),
          (aliveQuality3rdIpv4CityKey, new EdpiDwsEntity15Min(row)),
          (aliveQuality3rdIpv4Barskey, new EdpiDwsEntity15Min(row)),
          (aliveQuality3rdIpv6ProvKey, new EdpiDwsEntity15Min(row)),
          (aliveQuality3rdIpv6CityKey, new EdpiDwsEntity15Min(row)),
          (aliveQuality3rdIpv6Barskey, new EdpiDwsEntity15Min(row)),

          (websiteAnalyse4thIpv4ProvKey, new EdpiDwsEntity15Min(row)),
          (websiteAnalyse4thIpv4CityKey, new EdpiDwsEntity15Min(row)),
          (websiteAnalyse4thIpv4Barskey, new EdpiDwsEntity15Min(row)),
          (websiteAnalyse4thIpv6ProvKey, new EdpiDwsEntity15Min(row)),
          (websiteAnalyse4thIpv6CityKey, new EdpiDwsEntity15Min(row)),
          (websiteAnalyse4thIpv6Barskey, new EdpiDwsEntity15Min(row)),

          (websiteDomainPoint7thIpv4ProvKey, new EdpiDwsEntity15Min(row)),
          (websiteDomainPoint7thIpv4CityKey, new EdpiDwsEntity15Min(row)),
          (websiteDomainPoint7thIpv4Barskey, new EdpiDwsEntity15Min(row)),
          (websiteDomainPoint7thIpv6ProvKey, new EdpiDwsEntity15Min(row)),
          (websiteDomainPoint7thIpv6CityKey, new EdpiDwsEntity15Min(row)),
          (websiteDomainPoint7thIpv6Barskey, new EdpiDwsEntity15Min(row))

        )
      })
    }).reduceByKey(
      (entityA, entityB) => {
        entityA.addEnttiy(entityB)
      }
    ).cache()

    val edpiDesc1stdf = tmpRes
      .mapPartitions(iter => {
        iter.map {
          case (key, entity) => {
            val date = dateBC.value
            val provMapping = provMappingBC.value
            val tblFlag = key._1
            val logType = key._2
            val provinceName = key._3
            val ipVersion = key._4
            val userType = key._5
            val cityName = key._6
            val brasIp = key._7
            val lev2Domain = key._8
            val pppoeAccount = key._9
            val dstIp = key._10
            val agg15minuteTime = key._11

            val provinceId = provMapping.getOrDefault(provinceName, -1)

            val row = if (ipVersion == 4) {
              val totalDelay = entity.domain_user_ipv4_http_total_delay_avg_ms_15min
              val lossPackRate = entity.domain_user_ipv4_http_loss_packet_ratio_15min
              val maxDownloadSpeed = entity.domain_user_ipv4_http_dl_flow_max_bps_15min
              val isQuality = if (ZhichaModel.getModel2IsZhicha(logType, ipVersion, totalDelay, lossPackRate, maxDownloadSpeed)) 1 else 0
              val qualityScore = ZhichaModel.getModel1Score(logType, ipVersion, List((1, totalDelay),  (2, lossPackRate), (3, maxDownloadSpeed)))
              val dstIpArea = 0

              Row(
                date, logType, provinceName, ipVersion, userType, cityName, brasIp, isQuality, lev2Domain, tblFlag, pppoeAccount, dstIp, agg15minuteTime, dstIpArea,
                qualityScore,
                entity.domain_user_ipv4_http_cnt_15min,
                entity.domain_user_ipv4_http_trans_data_total_byte_15min,
                entity.domain_user_ipv4_http_conn_delay_avg_ms_15min,
                entity.domain_user_ipv4_http_1st_packet_delay_avg_ms_15min,
                lossPackRate,
                entity.domain_user_ipv4_http_dl_flow_avg_bps_15min,
                totalDelay, maxDownloadSpeed, provinceId
              )
            } else {
              val totalDelay = entity.domain_user_ipv6_http_total_delay_avg_ms_15min
              val lossPackRate = entity.domain_user_ipv6_http_loss_packet_ratio_15min
              val maxDownloadSpeed = entity.domain_user_ipv6_http_dl_flow_max_bps_15min
              val isQuality = if (ZhichaModel.getModel2IsZhicha(logType, ipVersion, totalDelay, lossPackRate, maxDownloadSpeed)) 1 else 0
              val qualityScore = 0.0
              val dstIpArea = 0
              Row(
                date, logType, provinceName, ipVersion, userType, cityName, brasIp, isQuality, lev2Domain, tblFlag, pppoeAccount, dstIp, agg15minuteTime, dstIpArea,
                qualityScore,
                entity.domain_user_ipv6_http_cnt_15min,
                entity.domain_user_ipv6_http_trans_data_total_byte_15min,
                entity.domain_user_ipv6_http_conn_delay_avg_ms_15min,
                entity.domain_user_ipv6_http_1st_packet_delay_avg_ms_15min,
                lossPackRate,
                entity.domain_user_ipv6_http_dl_flow_avg_bps_15min,
                totalDelay, maxDownloadSpeed, provinceId
              )
            }
            row
          }
        }
      })

    spark.createDataFrame(edpiDesc1stdf, EdpiTableTmp1Schema).createOrReplaceTempView("edpi_table_tmp1")

    // 第一张表
    val edpiDesc1stProvSqlDay = EdpiSql.edpiDesc1stProvSql.replace("${day_id}", date)
    val edpiDesc1stCitySqlDay = EdpiSql.edpiDesc1stCitySql.replace("${day_id}", date)
    val edpiDesc1stBrasSqlDay = EdpiSql.edpiDesc1stBrasSql.replace("${day_id}", date)
    logger.warn("[edpiDesc1stProvSqlDay ==>  " + edpiDesc1stProvSqlDay + "] [edpiDesc1stCitySqlDay ==> " + edpiDesc1stCitySqlDay + "] [edpiDesc1stBrasSqlDay ==> " + edpiDesc1stBrasSqlDay + "]")
    spark.sql(edpiDesc1stProvSqlDay)/*.show(10)*/
    spark.sql(edpiDesc1stCitySqlDay)/*.show(10)*/
    spark.sql(edpiDesc1stBrasSqlDay)/*.show(10)*/
    //      .repartition(1).write.csv("./output/"+ System.currentTimeMillis())

    // 第二张表
    val websitePoint2ndProvSqlDay = EdpiSql.websitePoint2ndProvSql.replace("${day_id}", date)
    val websitePoint2ndCitySqlDay = EdpiSql.websitePoint2ndCitySql.replace("${day_id}", date)
    val websitePoint2ndBrasSqlDay = EdpiSql.websitePoint2ndBrasSql.replace("${day_id}", date)
    logger.warn("[websitePoint2ndProvSqlDay ==>  " + websitePoint2ndProvSqlDay + "] [websitePoint2ndCitySqlDay ==> " + websitePoint2ndCitySqlDay + "] [websitePoint2ndBrasSqlDay ==> " + websitePoint2ndBrasSqlDay + "]")
    spark.sql(websitePoint2ndProvSqlDay)/*.show(10)*/
    spark.sql(websitePoint2ndCitySqlDay)/*.show(10)*/
    spark.sql(websitePoint2ndBrasSqlDay)/*.show(10)*/

    // 第三张表
    val aliveQuality3rdProvSqlDay = EdpiSql.aliveQuality3rdProvSql.replace("${day_id}", date)
    val aliveQuality3rdCitySqlDay = EdpiSql.aliveQuality3rdCitySql.replace("${day_id}", date)
    val aliveQuality3rdBrasSqlDay = EdpiSql.aliveQuality3rdBrasSql.replace("${day_id}", date)
    logger.warn("[aliveQuality3rdProvSqlDay ==>  " + aliveQuality3rdProvSqlDay + "] [aliveQuality3rdCitySqlDay ==> " + aliveQuality3rdCitySqlDay + "] [aliveQuality3rdBrasSqlDay ==> " + aliveQuality3rdBrasSqlDay + "]")
    spark.sql(aliveQuality3rdProvSqlDay)/*.show(10)*/
    spark.sql(aliveQuality3rdCitySqlDay)/*.show(10)*/
    spark.sql(aliveQuality3rdBrasSqlDay)/*.show(10)*/

    // 第四张表
    val websiteAnalyse4thProvSqlDay = EdpiSql.websiteAnalyse4thProvSql.replace("${day_id}", date)
    val websiteAnalyse4thCitySqlDay = EdpiSql.websiteAnalyse4thCitySql.replace("${day_id}", date)
    val websiteAnalyse4thBrasSqlDay = EdpiSql.websiteAnalyse4thBrasSql.replace("${day_id}", date)
    logger.warn("[websiteAnalyse4thProvSqlDay ==>  " + websiteAnalyse4thProvSqlDay + "] [websiteAnalyse4thCitySqlDay ==> " + websiteAnalyse4thCitySqlDay + "] [websiteAnalyse4thBrasSqlDay ==> " + websiteAnalyse4thBrasSqlDay + "]")
    spark.sql(websiteAnalyse4thProvSqlDay)/*.show(10)*/
    spark.sql(websiteAnalyse4thCitySqlDay)/*.show(10)*/
    spark.sql(websiteAnalyse4thBrasSqlDay)/*.show(10)*/

    // 第五张表 tbl_domain_top100_info 来自第二张表
    val domain100Top5thProvSqlDay = EdpiSql.domain100Top5thProvSql.replace("${day_id}", date)
    logger.warn("[domain100Top4thProvSqlDay ==>  " + domain100Top5thProvSqlDay + "]")
    spark.sql(domain100Top5thProvSqlDay)/*.show(10)*/


    // 第六张表 tbl_domain_top100_info_ip 来自第四张表
    val domainIP100Top6thProvSqlDay = EdpiSql.domainIP100Top6thProvSql.replace("${day_id}", date)
    logger.warn("[domainIP100Top6thProvSqlDay ==>  " + domainIP100Top6thProvSqlDay + "]")
    spark.sql(domainIP100Top6thProvSqlDay)/*.show(10)*/


    // 第七张表 tbl_website_domain_point_new_info
    val websiteDomainPoint7thProvSqlDay = EdpiSql.websiteDomainPoint7thProvSql.replace("${day_id}", date)
    val websiteDomainPoint7thCitySqlDay = EdpiSql.websiteDomainPoint7thCitySql.replace("${day_id}", date)
    val websiteDomainPoint7thBrasSqlDay = EdpiSql.websiteDomainPoint7thBrasSql.replace("${day_id}", date)
    logger.warn("[websiteDomainPoint7thProvSqlDay ==>  " + websiteDomainPoint7thProvSqlDay + "] [websiteDomainPoint7thCitySqlDay ==> " + websiteDomainPoint7thCitySqlDay + "] [websiteDomainPoint7thBrasSqlDay ==> " + websiteDomainPoint7thBrasSqlDay + "]")
    spark.sql(websiteDomainPoint7thProvSqlDay)/*.show(10)*/
    spark.sql(websiteDomainPoint7thCitySqlDay)/*.show(10)*/
    spark.sql(websiteDomainPoint7thBrasSqlDay)/*.show(10)*/


    // 第八张表 tbl_website_analyse_info
    spark.sql(EdpiSql.WebsiteTmpSql).createOrReplaceTempView("website_tmp")
    val websiteAnalyse8thProvSqlDay = EdpiSql.websiteAnalyse8thProvSql.replace("${day_id}", date)
    val websiteAnalyse8thCitySqlDay = EdpiSql.websiteAnalyse8thCitySql.replace("${day_id}", date)
    val websiteAnalyse8thBrasSqlDay = EdpiSql.websiteAnalyse8thBrasSql.replace("${day_id}", date)
    logger.warn("[websiteAnalyse8thProvSqlDay ==>  " + websiteAnalyse8thProvSqlDay + "] [websiteAnalyse8thCitySqlDay ==> " + websiteAnalyse8thCitySqlDay + "] [websiteAnalyse8thBrasSqlDay ==> " + websiteAnalyse8thBrasSqlDay + "]")
    spark.sql(websiteAnalyse8thProvSqlDay)/*.show(10)*/
    spark.sql(websiteAnalyse8thCitySqlDay)/*.show(10)*/
    spark.sql(websiteAnalyse8thBrasSqlDay)/*.show(10)*/

    val duration = (System.currentTimeMillis() - beginTime) / 1000 / 60
    logger.warn(this.getClass.getName + "==> 程序运行时间(分): " + duration)

    spark.close()
  }

  val EdpiTableTmp1Schema: StructType = StructType(
    Array(
      StructField("record_date", StringType, true), // 日期
      StructField("log_type", IntegerType, true), // 日志类型
      StructField("province_cn", StringType, true), // 省份
      StructField("ip_version", IntegerType, true), // ip类型 ipv4 ipv6
      StructField("user_type", IntegerType, true), // 1宽带账户,2专线账户
      StructField("city_cn", StringType, true), // 区域ID
      StructField("bras_ip", StringType, true), // Bras_IP
      StructField("zhicha_type", IntegerType, true), // 质差类型
      StructField("lev2_domain", StringType, true), // 二级域名
      StructField("tbl_flag", StringType, true), // 表区分
      StructField("pppoe_account", StringType, true), // 账号
      StructField("dst_ip", StringType, true), // 解析IP
      StructField("agg_15minute_time", StringType, true), // 15分钟时间点 HH:MM
      StructField("dst_ip_area", IntegerType, true), // 15分钟时间点 HH:MM


      StructField("quality_score", DoubleType, true), // 质差分数
      StructField("url_sum", LongType, true), // 访问量
      StructField("all_flow", LongType, true), // 传输量总和
      StructField("avg_conn_delay", DoubleType, true), // 平均连接时延
      StructField("avg_first_delay", DoubleType, true), // 平均首包时延
      StructField("avg_lost_rate", DoubleType, true), // 丢包率
      StructField("avg_download_speed", DoubleType, true), // 平均下载速率
      StructField("avg_all_dalay", DoubleType, true), // 整体时延
      StructField("max_download_speed", DoubleType, true), // 峰值下载速率
      StructField("province_id", IntegerType, true) // 省ID
    )
  )
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值