CC00034.bdpositions——|Hadoop&实时数仓.V14|——|项目.v14|DWD层处理|数据仓库层数据处理.V4|

一、编程实现DWD(AreaDetailInfo代码同样可以归属在DIM层中):参考代码
### --- 编程实现dwd层数据:AreaDetailInfo

package dim

import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
import org.apache.flink.table.api.Table
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row

object AreaDetailInfo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.enableCheckpointing(5000)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)

    /**
     * (110107,010-39.9056-3-116.223-中国,北京,北京市,石景山区-石景山区-110100-Shijingshan-石景山-100043)
     * (110116,010-40.316-3-116.632-中国,北京,北京市,怀柔区-怀柔区-110100-Huairou-怀柔-101400)
     */
    val data: DataStream[(String, String)] = env.addSource(new HBaseReader)

//    data.print()

//获取几个必要的字段id name pid
    val dataStream: DataStream[AreaDetail] = data.map(x => {
      val id: Int = x._1.toInt
      val datas: Array[String] = x._2.split("-")
      val name: String = datas(5).trim
      val pid: Int = datas(6).trim.toInt

      AreaDetail(id, name, pid)
    })

    //转成 地区id,地区的名字,城市的id,城市的名字, 省份的id,省份的名字
    //FlinkTable api
    val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)

    //临时表
    tableEnv.createTemporaryView("yanqi_area",dataStream)

    //sql -- 生成 区、市、省三级的明细宽表
    val sql : String =
      """
        |select a.id as areaid,a.name as aname,a.pid as cid,b.name as city, c.id as proid,c.name as province
        |from yanqi_area as a
        |inner join yanqi_area as b on a.pid = b.id
        |inner join yanqi_area as c on b.pid = c.id
        |""".stripMargin


    val areaTable: Table = tableEnv.sqlQuery(sql)

    val resultStream: DataStream[String] = tableEnv.toRetractStream[Row](areaTable).map(x => {
      val row: Row = x._2
      val areaId: String = row.getField(0).toString
      val aname: String = row.getField(1).toString
      val cid: String = row.getField(2).toString
      val city: String = row.getField(3).toString
      val proid: String = row.getField(4).toString
      val province: String = row.getField(5).toString
      areaId + "," + aname + "," + cid + "," + city + "," + proid + "," + province
    })
    resultStream.addSink(new HBaseWriterSink)

    env.execute()
  }
}
### --- HBaseWriterSink:

package dim

import myutils.ConnHBase
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client.{Connection, Put, Table}

class HBaseWriterSink extends RichSinkFunction[String]{
  var connection : Connection = _
  var hbTable : Table = _
  override def open(parameters: Configuration): Unit = {
    connection = new ConnHBase().connToHbase
    hbTable = connection.getTable(TableName.valueOf("dim_yanqi_area"))
  }

  override def close(): Unit = {
    if(hbTable != null) {
      hbTable.close()
    }
    if (connection != null) {
      connection.close()
    }
  }

  def insertDimArea(hbTable: Table, value: String): Unit = {
    val infos: Array[String] = value.split(",")
    val areaId: String = infos(0).trim.toString
    val aname: String = infos(1).trim.toString
    val cid: String = infos(2).trim.toString
    val city: String = infos(3).trim.toString
    val proid: String = infos(4).trim.toString
    val province: String = infos(5).trim.toString

    val put = new Put(areaId.getBytes())
    put.addColumn("f1".getBytes(),"aname".getBytes(),aname.getBytes())
    put.addColumn("f1".getBytes(),"cid".getBytes(),cid.getBytes())
    put.addColumn("f1".getBytes(),"city".getBytes(),city.getBytes())
    put.addColumn("f1".getBytes(),"proId".getBytes(),proid.getBytes())
    put.addColumn("f1".getBytes(),"province".getBytes(),province.getBytes())

    hbTable.put(put)
  }

  override def invoke(value: String, context: SinkFunction.Context[_]): Unit = {
//    println(value)
    insertDimArea(hbTable,value)
  }

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

yanqi_vip

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值