CC00041.bdpositions——|Hadoop&实时数仓.V21|——|项目.v21|需求一:数据处理&全量查询.V5|

一、编程实现第四层:DWS层数据处理:统计城市、省份订单总额
### --- 编程实现第四层:DWS层数据处理:统计城市、省份订单总额

~~~     dws : 统计城市、省份的订单总额,
~~~     订单总额和订单总数:orderNo、userId、status、totalMoney、areaId
~~~     ODS——mysql:yanqi_trade_orders    下沉    hbash:yanqi_trade_orders
~~~     ODS——mysql:yanqi_area                 下沉    hbash:yanqi_area
~~~     DIM ——hbash:yanqi_area                 生成    hbash:dim_yanqi_area 地域宽表
~~~     DWS——hbash:yanqi_trade_orders+dim_yanqi_area生成flink:table中临时表
二、编译实现运行程序
### --- 编程实现:TotalCityOrder.scala

package dw.dws

import dim.DimArea
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.table.api.Table
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row
/**
 * 需求1 :  查询城市、省份、订单总额、订单总数----全量查询
 * 获取两部分数据
     * 1、dim_yanqi_area  dim维表数据
     * 2、增量数据   yanqi_trade_orders(HBase)
 * 进行计算
 *      1,2 统一到一起参与计算  sql
 *
 *      //把获取到的数据 转成flinktable中的临时表
 *      sql
 *
 */
object TotalCityOrder {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.enableCheckpointing(5000)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)

    //1、dim_yanqi_area  dim维表数据
    val dimAreaStream: DataStream[(String, String)] = env.addSource(new ReadDimArea)
    //2、增量数据   yanqi_trade_orders(HBase)
    val tradeOrderStream: DataStream[(String, String)] = env.addSource(new ReadTradeOrder)
//    tradeOrderStream.print()

    val areaStream: DataStream[DimArea] = dimAreaStream.map(x => {
      val arearId: Int = x._1.toInt
      val datas: Array[String] = x._2.split(",")
      val aname: String = datas(0).trim.toString
      val cid: Int = datas(1).trim.toInt
      val city: String = datas(2).trim.toString
      val proid: Int = datas(3).trim.toInt
      val province: String = datas(4).trim.toString
      DimArea(arearId, aname, cid, city, proid, province)
    })

//    areaStream.print()

    val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
    tableEnv.createTemporaryView("dim_yanqi_area",areaStream)

    /**
     * (1,370203,2020-06-28 18:14:01,2,0,1,2020-10-21 22:54:31,1,23a0b124546,2,2020-06-28 18:14:01,0.12,2,10468.0,0,0,98)
     * (1,370203,2020-06-28 18:14:01,2,0,1,2020-10-21 22:54:31,1,23a0b124546,2,2020-06-28 18:14:01,0.12,2,10468.0,0,0,98)
     * orderid
     * orderNo
     * userId
     * status
     * totalMoney
     * areaId
     */
    val orderStream: DataStream[TradeOrder] = tradeOrderStream.map(x => {
      val orderid: Int = x._1.toInt
      val datas: Array[String] = x._2.split(",")
//      datas.foreach(println)
      val orderNo: String = datas(7).trim.toString
      val userId: Int = datas(15).trim.toInt
      val status: Int = datas(11).toInt
      val totalMoney: Double = datas(12).toDouble
      val areaId: Int = datas(0).toInt
//      println(orderid, orderNo, userId, status, totalMoney, areaId)
      TradeOrder(orderid, orderNo, userId, status, totalMoney, areaId)
    })

    orderStream.print()

    tableEnv.createTemporaryView("yanqi_orders",orderStream)

    val sql :String =
      """
        |select f.city,f.province,sum(f.qusum) as orderMoney, sum(f.qucount) as orderCount from
        |(select r.aname as qu,r.city as city,r.province as province,sum(k.totalMoney) as qusum,count(k.totalMoney) as qucount
        |from yanqi_orders as k
        |inner join dim_yanqi_area as r
        |on k.areaId = r.areaId
        |group by r.aname,r.city,r.province) as f
        |group by f.city,f.province
        |""".stripMargin


    val resultTable: Table = tableEnv.sqlQuery(sql)
    val result: DataStream[(Boolean, Row)] = tableEnv.toRetractStream[Row](resultTable)

//    result.print()

    result.filter(x=>x._1 == true).print()

    env.execute()

  }

}
### --- 编程实现:ReadDimArea.scala

package dw.dws

import java.util

import myutils.ConnHBase
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{Cell, TableName}

import scala.collection.JavaConverters._

class ReadDimArea extends RichSourceFunction[(String,String)]{
  private var conn :Connection = null;
  private var table : Table = null;
  private var scan : Scan = null;
  var flag = false

  override def open(parameters: Configuration): Unit = {
    val tableName: TableName = TableName.valueOf("dim_yanqi_area")
    val cf1 :String = "f1"
    conn = new ConnHBase().connToHbase
    table = conn.getTable(tableName)
    scan = new Scan()
    scan.addFamily(Bytes.toBytes(cf1))
  }

  override def run(ctx: SourceFunction.SourceContext[(String, String)]): Unit = {
    if(!flag) {
      val rs: ResultScanner = table.getScanner(scan)
      val iterator: util.Iterator[Result] = rs.iterator()
      while(iterator.hasNext) {
        val result: Result = iterator.next()
        val rowKey: String = Bytes.toString(result.getRow)
        val buffer = new StringBuffer()
        for(cell: Cell <- result.listCells().asScala) {
          val value: String = Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength)
          buffer.append(value).append(",")
        }
        val valueString: String = buffer.replace(buffer.length() - 1, buffer.length(), "").toString
        ctx.collect(rowKey,valueString)
      }
    }
  }

  override def cancel(): Unit = {
    flag = true;
  }

  override def close(): Unit = {
    try{
      if(table != null) {
        table.close()
      }
      if(conn != null) {
        conn.close()
      }
    }catch {
      case e : Exception => println(e.getMessage)
    }
  }
}
### --- 编程实现:ReadTradOrder.scala

package dw.dws

import java.util

import myutils.ConnHBase
import org.apache.flink.configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{Cell, TableName}

import scala.collection.JavaConverters._

class ReadTradeOrder extends RichSourceFunction[(String,String)]{
  private var conn :Connection = null;
  private var table : Table = null;
  private var scan : Scan = null;
  var flag = false

  override def open(parameters: configuration.Configuration): Unit = {
//    val tableName: TableName = TableName.valueOf("yanqi")
//    val tableName: TableName = TableName.valueOf("orders")
    val tableName: TableName = TableName.valueOf("yanqi_trade_orders")
    val cf1 :String = "f1"
    conn = new ConnHBase().connToHbase
    table = conn.getTable(tableName)
    scan = new Scan()
    scan.addFamily(Bytes.toBytes(cf1))

  }


  override def run(ctx: SourceFunction.SourceContext[(String, String)]): Unit = {
    if(!flag) {
      val rs: ResultScanner = table.getScanner(scan)
      val iterator: util.Iterator[Result] = rs.iterator()
      while(iterator.hasNext) {
        val result: Result = iterator.next()
        val rowKey: String = Bytes.toString(result.getRow)
        val buffer = new StringBuffer()
        for(cell: Cell <- result.listCells().asScala) {
          val value: String = Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength)
          buffer.append(value).append(",")
        }
        val valueString: String = buffer.replace(buffer.length() - 1, buffer.length(), "").toString
//        println("--" + valueString)
        ctx.collect(rowKey,valueString)
      }
    }
  }

  override def cancel(): Unit = {
    flag = true;
  }

  override def close(): Unit = {
    try{
      if(table != null) {
        table.close()
      }
      if(conn != null) {
        conn.close()
      }
    }catch {
      case e : Exception => println(e.getMessage)
    }
  }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

yanqi_vip

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值