思路分析
- 自定义数据源,读取hbase表中dim_lagou_area和lagou_trade_orders消息
- 根据流数据构建flink table
- 执行关联的sql
SELECT
areas.city AS city,
areas.province AS province,
COUNT(orders.orderid) AS totalNumber,
SUM(orders.totalMoney) AS totalMoney FROM
lagou_trade_orders orders
INNER JOIN dim_lagou_area areas ON orders.areaId = areas.areaId
GROUP BY areas.city,areas.province
- 将结果table转换成流数据并打印出来
编码实现
1.建立样例类,自定义获取数据源继承RichSourceFunction,重写
open,run,close方法
package dws
case class TradeOrders(
orderId:Int,
orderNo:String,
userId:Int,
status:Int,
totalMoney:Double,
areaId:Int
)
package dws
import java.util
import dim.DimArea
import myUtils.ConnHBase
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.source.{
RichSourceFunction, SourceFunction}
import org.apache.hadoop.hbase.{
Cell, TableName}
import org.apache.hadoop.hbase.client.{
Connection, Result, ResultScanner, Scan, Table}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.flink.api.scala._
import scala.collection.JavaConverters._
class ReadDimArea extends RichSourceFunction[(String,String)] {
private var conn:Connection=_
private var table:Table=_
private var scan:Scan=_
private var flag=false
//open
override def open(parameters: Configuration): Unit = {
conn = new ConnHBase().connToHabse
table = conn.getTable(TableName.valueOf("dim_lagou_area"))
scan = new Scan()
scan.addFamily("f1".getBytes())
}
override def close(): Unit = {<