9、业务库数据分析
前面我们已经通过sparkStreaming的程序,解析kafka当中业务库的数据,将数据全部都落地到了Hbase当中的四张表(order_info",“renter_info”,“driver_info”,"opt_alliance_business)里面去了。
针对以上订单表,司机表,用户表,司管方表等,我们可以针对以上存储在Hbase当中的数据进行统计分析。
通过sparkSQL读取Hbase当中的数据,实现数据的统计分析,然后将统计分析的结果,通过sparkSQL自定义输出源,保存到Hbase里面去。
这里涉及到通过自定义sparkSQL的Hbase数据源来实现读取Hbase的数据,以及自定义sparkSQL的数据保存,将统计的结果保存到Hbase里面去
9.1、sparkSQL自定义数据源
1、创建hbase数据源表
为了实现我们的sparkSQL自定义数据源,获取Hbase当中的数据,我们可以开发测试用例,通过自定义数据源实现获取Hbase当中的数据,然后将查询的数据保存到Hbase里面去
node01执行以下命令创建Hbase表
cd /kkb/install/hbase-1.2.0-cdh5.14.2/
bin/hbase shell
create 'spark_hbase_sql','cf'
put 'spark_hbase_sql','0001','cf:name','zhangsan'
put 'spark_hbase_sql','0001','cf:score','80'
put 'spark_hbase_sql','0002','cf:name','lisi'
put 'spark_hbase_sql','0002','cf:score','60'
2、创建Hbase的数据保存表
node01执行以下命令创建Hbase表,用于将分析之后的结果数据保存到Hbase当中来
cd /kkb/install/hbase-1.2.0-cdh5.14.2/
bin/hbase shell
create 'spark_hbase_write','cf'
3、自定义SparkSQL的数据源,读取Hbase数据以及将分析结果
关于sparkSQL自定义数据源,参见以下文章
https://blog.csdn.net/zjerryj/article/details/84922369
https://www.cnblogs.com/niutao/p/10801259.html
在我们的travel_spark这个项目模块下,创建package com.travel.programApp
,然后在这个package下面创建scala的object文件HbaseSourceAndSink.scala
import java.util
import java.util.Optional
import com.travel.utils.HbaseTools
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.sources.v2.reader._
import org.apache.spark.sql.sources.v2.writer.{DataSourceWriter, DataWriter, DataWriterFactory, WriterCommitMessage}
import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, ReadSupport, WriteSupport}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
object HBaseSourceAndSink {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.master("local[2]")
.getOrCreate()
val df = spark.read
.format("com.travel.programApp.HBaseSource")
.option("hbase.table.name", "spark_hbase_sql")
.option("schema", "`name` STRING,`score` STRING")
.option("cf.cc","cf:name,cf:score")
.load()
df.explain(true)
df.createOrReplaceTempView("sparkHBaseSQL")
df.printSchema()
val frame: DataFrame = spark.sql("select * from sparkHBaseSQL where score > 60")
frame.write.format("com.travel.programApp.HBaseSource")
.mode(SaveMode.Overwrite)
.option("hbase.table.name","spark_hbase_write")
.save()
}
}
class HBaseSource extends DataSourceV2 with ReadSupport with WriteSupport{
override def createReader(options: DataSourceOptions): DataSourceReader = {
new HBaseDataSourceReader(options.get("hbase.table.name").get(),options.get("schema").get(),options.get("cf.cc").get())
}
override def createWriter(jobId: String, schema: StructType, mode: SaveMode, options: DataSourceOptions): Optional[DataSourceWriter] = {
Optional.of(new HBaseDataSourceWrite)
}
}
class HBaseDataSourceWrite extends DataSourceWriter{
override def createWriterFactory(): DataWriterFactory[Row] = {
new HBaseDataWriterFactory
}
override def commit(messages: Array[WriterCommitMessage]): Unit = {
}
override def abort(messages: Array[WriterCommitMessage]): Unit = {
}
}
class HBaseDataWriterFactory extends DataWriterFactory[Row]{
override def createDataWriter(partitionId: Int, attemptNumber: Int): DataWriter[Row] = {
new HBaseDataWriter
}
}
class HBaseDataWriter extends DataWriter[Row]{
private val conn: Connection = HbaseTools.getHbaseConn
private val table: Table = conn.getTable(TableName.valueOf("spark_hbase_write"))
override def write(record: Row): Unit = {
val name: String = record.getString(0)
val score: String = record.getString(1)
val put = new Put("0001".getBytes())
put.addColumn("cf".getBytes(),"name".getBytes(),name.getBytes())
put.addColumn("cf".getBytes(),"score".getBytes(),score.getBytes())
table.put(put)
}
override def commit(): WriterCommitMessage = {
table.close()
conn.close()
null
}
override def abort(): Unit = {
null
}
}
class HBaseDataSourceReader(tableName:String,schema:String,cfcc:String) extends DataSourceReader {
//定义HBase的schema
private val structType: StructType = StructType.fromDDL(schema)
override def readSchema(): StructType = {
structType
}
//返回DataReaderFactory
override def createDataReaderFactories(): util.List[DataReaderFactory[Row]] = {
import collection.JavaConverters._
Seq(
new HBaseReaderFactory(tableName,cfcc).asInstanceOf[DataReaderFactory[Row]]
).asJava
}
}
class HBaseReaderFactory(tableName:String,cfcc:String) extends DataReaderFactory[Row] {
override def createDataReader(): DataReader[Row] = {
new HBaseReader(tableName,cfcc)
}
}
class HBaseReader(tableName:String,cfcc:String) extends DataReader[Row] {
private var hbaseConnection:Connection = null
private var resultScanner:ResultScanner = null
private var nextResult:Result = null
// 获取HBase当中的数
val data: Iterator[Seq[AnyRef]] = getIterator
def getIterator: Iterator[Seq[AnyRef]] = {
import scala.collection.JavaConverters._
hbaseConnection = HbaseTools.getHbaseConn
val table: Table = hbaseConnection.getTable(TableName.valueOf(tableName))
resultScanner = table.getScanner(new Scan())
val iterator: Iterator[Seq[AnyRef]] = resultScanner.iterator().asScala.map(eachResult => {
val str: String = Bytes.toString(eachResult.getValue("cf".getBytes(), "name".getBytes()))
val score: String = Bytes.toString(eachResult.getValue("cf".getBytes(), "score".getBytes()))
Seq(str, score)
})
iterator
}
override def next(): Boolean = {
data.hasNext
}
override def get(): Row = {
val seq: Seq[Any] = data.next()
Row.fromSeq(seq)
}
override def close(): Unit = {
hbaseConnection.close()
}
}
9.2、自定义SparkSQL数据源进行数据分析
前面我们通过自定义sparkSQL数据源,可以获取Hbase当中的数据,以及将分析的结果保存到Hbase当中去,那么接下来我们就可以通过自定义SparkSQL数据源,实现分析Hbase当中的数据。
在travel_spark这个工程模块下的src/main/scala路径下创建package com.travel.programApp
然后创建com.travel.programApp. SparkSQLHBaseSource.scala
这个object文件,实现读取Hbase的数据
import com.travel.transaction._
import com.travel.utils.GlobalConfigUtils
import org.apache.spark.sql.{DataFrame, SparkSession}
object SparkSQLHBaseSource {
def main(args: Array[String]): Unit = {
val sparkSession: SparkSession = SparkSession.builder().master("local[1]").appName("sparkSQLHBase").getOrCreate()
sparkSession.sparkContext.setLogLevel("WARN")
val order: DataFrame = sparkSession.read
.format("com.travel.programApp.hbaseSource.HBaseSource")
.options(Map(
GlobalConfigUtils.getProp("sparksql_table_schema") -> GlobalConfigUtils.getProp("order.sparksql_table_schema"),
GlobalConfigUtils.getProp("hbase_table_name") -> GlobalConfigUtils.getProp("syn.table.order_info"),
GlobalConfigUtils.getProp("hbase_table_schema") -> GlobalConfigUtils.getProp("order.hbase_table_schema")
)).load()
val driver: DataFrame = sparkSession.read
.format("com.travel.programApp.hbaseSource.HBaseSource")
.options(Map(
GlobalConfigUtils.getProp("sparksql_table_schema") -> GlobalConfigUtils.getProp("drivers.spark_sql_table_schema"),
GlobalConfigUtils.getProp("hbase_table_name") -> GlobalConfigUtils.getProp("syn.table.driver_info"),
GlobalConfigUtils.getProp("hbase_table_schema") -> GlobalConfigUtils.getProp("driver.hbase_table_schema")
)).load()
val renter: DataFrame = sparkSession.read
.format("com.travel.programApp.hbaseSource.HBaseSource")
.options(Map(
GlobalConfigUtils.getProp("sparksql_table_schema") -> GlobalConfigUtils.getProp("registe.sparksql_table_schema"),
GlobalConfigUtils.getProp("hbase_table_name") -> GlobalConfigUtils.getProp("syn.table.renter_info"),
GlobalConfigUtils.getProp("hbase_table_schema") -> GlobalConfigUtils.getProp("registe.hbase_table_schema")
)).load()
//注册
order.createOrReplaceTempView("order")
driver.createOrReplaceTempView("driver")
renter.createOrReplaceTempView("renter")
//cache
sparkSession.sqlContext.cacheTable("order")
sparkSession.sqlContext.cacheTable("driver")
sparkSession.sqlContext.cacheTable("renter")
// OrderTransation.init(sparkSession)
// RenterTransation.init(sparkSession)
// DriverTransation.init(sparkSession)
// HotOrderTransation.init(sparkSession)
// HotAreaOrder.init(sparkSession)
}
}
9.3、自定义SparkSQL数据源,将数据保存到Hbase
import org.apache.spark.sql.{DataFrame, SaveMode}
object SparkSQLHBaseSink {
def saveToHBase(dataFrame: DataFrame,tableName:String,rowkey:String,fields:String):Unit={
dataFrame.write.format("com.travel.programApp.hbaseSink.HBaseSink")
.mode(SaveMode.Overwrite)
.option("hbase.table.name",tableName)
.option("hbase.rowkey",rowkey)
.option("hbase.fields",fields)
.save()
}
}
9.4、订单监控页面大屏开发
1、统计每个城市每日以及每月车辆的总数量
1.1、每个城市当日车辆总数量
sql语句定义
select
substr(tb.begin_address_code , 1 ,4) as begin_address_code ,
count(distinct vehicle_license) as dayVehicleCount
from
(select
begin_address_code ,
vehicle_license
from
order
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15
') tb
group by
substr(tb.begin_address_code , 1 ,4)
1.2、每个城市当月车辆总数量
sql语句定义
select
substr(tb.begin_address_code , 1 ,4) as begin_address_code ,
count(distinct vehicle_license) as monthVehicleCount
from
(select
begin_address_code ,
vehicle_license
from
order
where
date_format(create_time , 'yyyy-MM') = '2019-07') tb
group by
substr(tb.begin_address_code , 1 ,4)
1.3、汇总每个城市当日以及当月车辆总数量两个指标
select
monthVehicle.begin_address_code ,
NVL(monthVehicle.monthVehicleCount ,0) as monthVehicleCount,
NVL(dayVehicle.dayVehicleCount , 0) as dayVehicleCount
from
monthVehicle left join dayVehicle
on
monthVehicle.begin_address_code = dayVehicle.begin_address_code
2、统计每个城市当日,当周,当月订单数量
对城市进行分组,然后将每个城市的订单进行分组求和
2.1、每个城市当日订单总数量
sql语句定义
select
substr(tb.begin_address_code , 1 , 4) as begin_address_code ,
count(1) as dayOrderCount
from
(select begin_address_code from order
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15') tb
group by
substr(tb.begin_address_code , 1 , 4)
2.2、每个城市本周订单总数量
sql语句定义
select
substr(tb.begin_address_code , 1 , 4) as begin_address_code ,
count(1) as weekOrderCount
from
(select begin_address_code from order
where
weekofyear(create_time) = '29') tb
group by
substr(tb.begin_address_code , 1 , 4)
2.3、每个城市本月订单总数量
sql语句定义
select
substr(tb.begin_address_code , 1 , 4) as begin_address_code ,
count(1) as monthOrderCount
from
(select begin_address_code from order
where
date_format(create_time , 'yyyy-MM') = '2019-07') tb
group by
substr(tb.begin_address_code , 1 , 4)
2.4、汇总当日,当周,当月每个城市的订单总数量
sql语句定义
select
tb1.begin_address_code ,
tb1.monthOrderCount ,
tb1.weekOrderCount ,
dayOrder.dayOrderCount
from
(select
monthOrder.begin_address_code ,
monthOrder.monthOrderCount ,
weekOrder.weekOrderCount
from
monthOrder left join weekOrder
on
monthOrder.begin_address_code = weekOrder.begin_address_code) tb1 left join dayOrder
on
tb1.begin_address_code = dayOrder.begin_address_code
3、汇总车辆以及订单指标
sql语句定义
select
cast(
concat(
if(v_begin_address_code is null ,o_begin_address_code , v_begin_address_code)
, '00')
as string) as rk,
cast(if(monthVehicleCount is null , 0 ,monthVehicleCount) as string) as monthVehicleCount ,
cast(if(dayVehicleCount is null , 0 , dayVehicleCount) as string) as dayVehicleCount ,
cast(
concat(
if(o_begin_address_code is null ,v_begin_address_code , o_begin_address_code)
, '00')
as string) as o_begin_address_code,
cast(if(monthOrderCount is null , 0 , monthOrderCount) as string) as monthOrderCount ,
cast(if(weekOrderCount is null, 0 , weekOrderCount) as string) as weekOrderCount ,
cast(if(dayOrderCount is null ,0, dayOrderCount) as string) as dayOrderCount
from
(select
vehcileCount.begin_address_code as v_begin_address_code,
vehcileCount.monthVehicleCount ,
vehcileCount.dayVehicleCount ,
_totalOrder.begin_address_code as o_begin_address_code ,
_totalOrder.monthOrderCount ,
_totalOrder.weekOrderCount ,
_totalOrder.dayOrderCount
from
vehcileCount full outer join _totalOrder
on
vehcileCount.begin_address_code = _totalOrder.begin_address_code
) tb
4、统计平台的总订单数,当月总订单数,出租车行驶总里程数,日均订单
4.1、平台总订单
sql语句定义
select1 myid ,
count(1) as totalOrderNum
from order
4.2、当月总订单
sql语句定义
select
1 myid ,
count(1) as monthOrderNum
from
order
where
date_format(create_time , 'yyyy-MM') = '2019-07'
4.3、出租车行驶总里程数
sql语句定义
select
1 as myid ,
sum(charge_mileage) as charge_mileage
from
order
4.4、日均订单数量
sql语句定义
select
1 as myid ,
round((tb.totalOrder /tb.num) , 2) as avgOrderNum
from
(select
count(1) as totalOrder ,
(select
DATEDIFF(
max(date_format(create_time , 'yyyy-MM-dd')) ,
min(date_format(create_time , 'yyyy-MM-dd'))
) as dayNum
from
order ) num
from order) tb
4.5、汇总以上所有指标
sql语句定义
select
cast(t2.myid as string) as rk,
cast(t2.totalOrderNum as string) as totalOrderNum,
cast(t2.monthOrderNum as string) as monthOrderNum,
cast(t2.avgOrderNum as string) as avgOrderNum ,
cast(_totalCharge_mileage.charge_mileage as string) as charge_mileage
from
(select
t1.myid ,
t1.totalOrderNum ,
t1.monthOrderNum ,
avgOrder.avgOrderNum
from
(select
_total_order.myid ,
_total_order.totalOrderNum ,
month_totalOrder.monthOrderNum
from
_total_order left join month_totalOrder
on
_total_order.myid = month_totalOrder.myid) t1 left join avgOrder
on
t1.myid = avgOrder.myid) t2 left join _totalCharge_mileage
on
t2.myid = _totalCharge_mileage.myid
5、统计平台总订单数,平台注册用户数,平台订单总金额
5.1、平台总订单数
sql语句定义
select
1 myid ,
count(1) as totalOrderNum
from
order
5.2、平台注册用户数
sql语句定义
select 1 as myid , count(1) as registerTotalCount from renter
5.3、平台总收入数
sql语句定义
select 1 as myid , sum(pay_all) pay_all from order
5.4、汇总平台总订单数,平台注册用户数,平台订单总金额
sql定义
select
tb1.myid as myid,
cast(tb1.totalCount as string) as totalCount ,
cast(tb1.registerTotalCount as string) as registerTotalCount,
cast(pay_all.pay_all as string) as pay_all
from
(select
totalOrders.myid ,
totalOrders.totalOrderNum as totalCount ,
rtc.registerTotalCount
from
totalOrders , rtc
where
totalOrders.myid = rtc.myid) tb1 , pay_all
where
tb1.myid = pay_all.myid
9.5、用户统计功能模块
统计用户留存率以及活跃用户
1、统计各个城市当日新增和当日活跃用户数
1.1、当日新增用户数
sql定义
select
substr(last_login_city , 1 , 4) as last_login_city ,
count(1) as _day_new_user
from
(select last_login_city from renter
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15'
and
last_login_city != '') tb
group by substr(last_login_city , 1 , 4)
1.2、当日活跃用户数
sql定义
select
substr(last_login_city,1,4) as last_login_city ,
count(distinct id) DAU
from
(select
last_login_city ,
id
from renter
where date_format(last_logon_time , 'yyyy-MM-dd') = '2019-07-15'
and
last_login_city != '') tb
group by
substr(last_login_city,1,4)
1.3、关联日新增和日活跃指标
sql定义
select
cast(
concat(
if(_dayRegister.last_login_city is null , _dayActive.last_login_city , _dayRegister.last_login_city)
, '00'
)
as string
) as rk ,
cast(
concat(
if(_dayRegister.last_login_city is null , _dayActive.last_login_city , _dayRegister.last_login_city)
, '00'
)
as string
) as _city_code ,
cast(if(_dayRegister._day_new_user is null , 0 , _dayRegister._day_new_user) as string) as _day_new_user ,
cast(if(_dayActive.DAU is null , 0 , _dayActive.DAU) as string) as DAU
from
_dayRegister full outer join _dayActive
on
_dayRegister.last_login_city = _dayActive.last_login_city
2、统计平台当日,本周,当月新注册用户数
2.1、当日新增用户数
sql定义
select
1 as myid ,
count(1) as dayNewUserCount
from
(select id from renter
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15') tb
2.2、本周新增用户数
sql定义
select 1 as myid , count(1) as weekNewUserCount
from
(select id from renter
where
weekofyear(create_time) = '29') tb
2.3、当月新增用户数
sql定义
select 1 as myid , count(1) as monthNewUserCount
from
(select id from renter
where
date_format(create_time , 'yyyy-MM') = '2019-07') tb
2.4、汇总当日,当周,当月新增用户数
sql定义
select
cast(tb.myid as string) as myid,
cast(tb.monthNewUserCount as string) as monthNewUserCount,
cast(tb.weekNewUserCount as string) as weekNewUserCount,
cast(dnuc.dayNewUserCount as string) as dayNewUserCount
from
(select
mnuc.myid ,
mnuc.monthNewUserCount ,
wnuc.weekNewUserCount
from
mnuc , wnuc
where
mnuc.myid = wnuc.myid) tb , dnuc
where
tb.myid = dnuc.myid
3、用户留存率统计
3.1、统计次日留存率
sql定义
select
1 as myid ,
concat(cast(count(tb2.last_logon_time)*100/count(tb1.create_time) as string) , '%') as dayStateRate
from
(select
id ,
date_format(create_time , 'yyyy-MM-dd') create_time
from
renter
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-14') tb1
left outer join
(select
id ,
date_format(last_logon_time , 'yyyy-MM-dd') last_logon_time
from
renter
where
date_format(last_logon_time , 'yyyy-MM-dd') = '2019-07-15') tb2
on
tb1.id = tb2.id
3.2、计算本周留存率
sql定义
select
1 as myid ,
concat(cast(count(tb2.last_logon_time)*100/count(tb1.create_time) as string) , '%') as weekStateRate
from
(select
id ,
date_format(create_time , 'yyyy-MM-dd') create_time
from
renter
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15') tb1
left outer join
(select
id ,
date_format(last_logon_time , 'yyyy-MM-dd') last_logon_time
from
renter
where
date_format(last_logon_time , 'yyyy-MM-dd') = '2019-07-07') tb2
on
tb1.id = tb2.id
3.3、计算本月留存率
sql定义
select
1 as myid ,
concat(cast(count(tb2.last_logon_time)*100/count(tb1.create_time) as string) , '%') as monthStateRate
from
(select
id ,
date_format(create_time , 'yyyy-MM-dd') create_time
from
renter
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-30') tb1
left outer join
(select
id ,
date_format(last_logon_time , 'yyyy-MM-dd') last_logon_time
from
renter
where
date_format(last_logon_time , 'yyyy-MM-dd') = '2019-07-01') tb2
on
tb1.id = tb2.id
3.4、汇总次日,本周,本月留存率
select
cast(NVL(tb1.myid,1) as string) as myid ,
cast(NVL(tb1.monthStateRate , '0.0%') as string) as monthStateRate,
cast(NVL(tb1.weekStateRate, '0.0%') as string) as weekStateRate,
cast(NVL(dayStateRate.dayStateRate, '0.0%') as string) as dayStateRate
from
(select
NVL(monthStateRate.myid , 1) as myid,
NVL(monthStateRate.monthStateRate , '0.0%') as monthStateRate,
NVL(weekStateRate.weekStateRate , '0.0%') as weekStateRate
from
monthStateRate full outer join weekStateRate
on
monthStateRate.myid = weekStateRate.myid) tb1 full outer join dayStateRate
on
tb1.myid = dayStateRate.myid
4、活跃用户统计
统计日活,周活,月活跃用户数量
4.1、日活跃用户统计
sql定义
select
1 tmpID ,
count(distinct id) DAU
from renter
where date_format(last_logon_time , 'yyyy-MM-dd') = '2019-07-15'
4.2、周活跃用户统计
sql定义
select
1 tmpID ,
count(distinct id) WAU
from renter
where weekofyear(date_format(last_logon_time , 'yyyy-MM-dd')) = '29'
4.3、月活跃用户统计
sql定义
select
1 tmpID ,
count(distinct id) MAU
from renter
where
date_format(last_logon_time , 'yyyy-MM') = '2019-07'
9.6、司机统计功能模块
1、平台有效订单统计
统计当日,本周,本月,本季度,本年平台的有效订单
1.1、当日平台有效订单数
sql定义
select
city_name ,
count(1) as _effective_num_day
from order
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15'
and
cancel = 0
group by
city_name
1.2、平台本周有效订单数
sql定义
select
city_name ,
count(1) as _effective_num_week
from order
where
weekofyear(create_time) = '29'
and
cancel = 0
group by
city_name
1.3、平添本月有效订单数
sql定义
select
city_name ,
count(1) as _effective_num_month
from order
where
date_format(create_time , 'yyyy-MM') = '2019-07'
and
cancel = 0
group by
city_name
1.4、平添本季度有效订单数
sql定义
select
city_name ,
count(1) as _effective_num_quarter
from order
where
quarter(create_time) = '3'
and
date_format(create_time , 'yyyy') = '2019'
and
cancel = 0
group by
city_name
1.5、平台本年有效订单数
sql定义
select
city_name ,
count(1) as _effective_num_year
from order
where
date_format(create_time , 'yyyy') = '2019'
and
cancel = 0
group by
city_name
2、平台全部订单统计
统计当日,本周,本月,本季度,本年的平台全部订单数
2.1、当日平台全部订单
sql定义
select
city_name ,
count(1) as _total_num_day
from order
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15'
group by
city_name
2.2、本周平台全部订单
sql定义
select
city_name ,
count(1) as _total_num_week
from order
where
weekofyear(create_time) = '29'
group by
city_name
2.3、本月平台全部订单
sql定义
select
city_name ,
count(1) as _total_num_month
from order
where
date_format(create_time , 'yyyy-MM') = '2019-07'
group by
city_name
2.4、本季度平台全部订单
sql定义
select
city_name ,
count(1) as _total_num_quarter
from order
where
quarter(create_time) = '3'
and
date_format(create_time , 'yyyy') = '2019'
group by
city_name
2.5、本年平台全部订单
sql定义
select
city_name ,
count(1) as _total_num_year
from order
where
date_format(create_time , 'yyyy') = '2019'
group by
city_name
3、平台订单完成率统计
统计当日,本周,本月,本季度,本年度订单的完成率
3.1、当日订单完成率
sql定义
select
city_name ,
case when _effective_num_day=0 then '0.0%' else CONCAT(cast(round(_effective_num_day*100/_total_num_day, 2) as string),'%') end as _day_comple_rate
from
(select
_effective_order_day.city_name ,
_effective_order_day._effective_num_day ,
_total_order_day._total_num_day
from
_effective_order_day left join _total_order_day
on
_effective_order_day.city_name = _total_order_day.city_name
) tb
3.2、本周订单完成率
sql定义
select
city_name ,
case when _effective_num_week=0 then '0.0%' else CONCAT(cast(round(_effective_num_week*100/_total_num_week, 2) as string),'%') end as _week_comple_rate
from
(select
_effective_order_week.city_name ,
_effective_order_week._effective_num_week ,
_total_order_week._total_num_week
from
_effective_order_week left join _total_order_week
on
_effective_order_week.city_name = _total_order_week.city_name
) tb
3.3、本月订单完成率
sql定义
select
city_name ,
case when _effective_num_month=0 then '0.0%' else CONCAT(cast(round(_effective_num_month*100/_total_num_month, 2) as string),'%') end as _month_comple_rate
from
(select
_effective_order_month.city_name ,
_effective_order_month._effective_num_month ,
_total_order_month._total_num_month
from
_effective_order_month left join _total_order_month
on
_effective_order_month.city_name = _total_order_month.city_name
) tb
3.4、本季度订单完成率
sql定义
select
city_name ,
case when _effective_num_quarter=0 then '0.0%' else CONCAT(cast(round(_effective_num_quarter*100/_total_num_quarter, 2) as string),'%') end as _quarter_comple_rate
from
(select
_effective_order_quarter.city_name ,
_effective_order_quarter._effective_num_quarter ,
_total_order_quarter._total_num_quarter
from
_effective_order_quarter left join _total_order_quarter
on
_effective_order_quarter.city_name = _total_order_quarter.city_name
) tb
3.5、本年度订单完成率
sql定义
select
city_name ,
case when _effective_num_year=0 then '0.0%' else CONCAT(cast(round(_effective_num_year*100/_total_num_year, 2) as string),'%') end as _year_comple_rate
from
(select
_effective_order_year.city_name ,
_effective_order_year._effective_num_year ,
_total_order_year._total_num_year
from
_effective_order_year left join _total_order_year
on
_effective_order_year.city_name = _total_order_year.city_name
) tb
4、汇总平台有效订单,平台全部订单以及平台订单完成率指标
sql定义
select
cast(1 as string) as rk ,
cast(year_quarter_month_week_tb.city_name as string) as city_name ,
cast(year_quarter_month_week_tb._year_comple_rate as string) as _year_comple_rate ,
cast(if(year_quarter_month_week_tb._quarter_comple_rate is null , '0.0%' , year_quarter_month_week_tb._quarter_comple_rate) as string) as _quarter_comple_rate,
cast(if(year_quarter_month_week_tb._month_comple_rate is null , '0.0%' , year_quarter_month_week_tb._month_comple_rate) as string) as _month_comple_rate,
cast(if(year_quarter_month_week_tb._week_comple_rate is null , '0.0%' , year_quarter_month_week_tb._week_comple_rate) as string) as _week_comple_rate,
cast(if(_plat_order_day_status._day_comple_rate is null , '0.0%' , _plat_order_day_status._day_comple_rate) as string) as _day_comple_rate
from
(select
year_quarter_month_tb.city_name ,
year_quarter_month_tb._year_comple_rate ,
year_quarter_month_tb._quarter_comple_rate ,
year_quarter_month_tb._month_comple_rate ,
_plat_order_week_status._week_comple_rate
from
(select
year_quarter_tb.city_name ,
year_quarter_tb._year_comple_rate ,
year_quarter_tb._quarter_comple_rate ,
_plat_order_month_status._month_comple_rate
from
(select
_plat_order_year_status.city_name ,
_plat_order_year_status._year_comple_rate ,
_plat_order_quarter_status._quarter_comple_rate
from
_plat_order_year_status left join _plat_order_quarter_status
on
_plat_order_year_status.city_name = _plat_order_quarter_status.city_name
) year_quarter_tb left join _plat_order_month_status
on
year_quarter_tb.city_name = _plat_order_month_status.city_name
) year_quarter_month_tb left join _plat_order_week_status
on
year_quarter_month_tb.city_name = _plat_order_week_status.city_name
) year_quarter_month_week_tb left join _plat_order_day_status
on
year_quarter_month_week_tb.city_name = _plat_order_day_status.city_name
5、统计司机有效订单数
5.1、当日司机有效订单数
sql定义
select
driver_id ,
driver_name,
count(1) as _effective_num_day
from order
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15'
and
cancel = 0
group by
driver_id ,
driver_name
5.2、当周司机有效订单数
sql定义
select
driver_id ,
driver_name ,
count(1) as _effective_num_week
from order
where
weekofyear(create_time) = '29'
and
cancel = 0
group by
driver_id ,
driver_name
5.3、当月司机有效订单数
sql定义
select
driver_id ,
driver_name ,
count(1) as _effective_num_month
from order
where
date_format(create_time , 'yyyy-MM') = '2019-07'
and
cancel = 0
group by
driver_id ,
driver_name
5.4、当季度司机有效订单数
当季度司机有效订单数
select
driver_id ,
driver_name ,
count(1) as _effective_num_quarter
from order
where
quarter(create_time) = '3'
and
date_format(create_time , 'yyyy') = date_format(NOW() , 'yyyy')
and
cancel = 0
group by
driver_id ,
driver_name
5.5、当年司机有效订单数
sql定义
select
driver_id ,
driver_name ,
count(1) as _effective_num_year
from order
where
date_format(create_time , 'yyyy') = '2019'
and
cancel = 0
group by
driver_id ,
driver_name
6、统计司机订单总数
统计司机当日,当周,当月,当季度,当年订单总数
6.1、当日订单总数统计
sql定义
select
driver_id ,
driver_name ,
count(1) as _total_num_day
from order
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15'
group by
driver_id , driver_name
6.2、当周订单总数统计
sql定义
select
driver_id ,
driver_name ,
count(1) as _total_num_week
from order
where
weekofyear(create_time) = '29'
group by
driver_id , driver_name
6.3、当月订单总数
sql定义
select
driver_id ,
driver_name ,
count(1) as _total_num_month
from order
where
date_format(create_time , 'yyyy-MM') = '2019-07'
group by
driver_id , driver_name
6.4、当季度订单总数
sql定义
select
driver_id ,
driver_name ,
count(1) as _total_num_quarter
from order
where
quarter(create_time) = '3'
and
date_format(create_time , 'yyyy') = date_format(NOW() , 'yyyy')
group by
driver_id , driver_name
6.5、当年订单总数
sql定义
select
driver_id ,
driver_name ,
count(1) as _total_num_year
from order
where
date_format(create_time , 'yyyy') = '2019'
group by
driver_id , driver_name
7、基于司机有效订单数以及订单总数统计司机订单完成率
基于司机有效订单数,以及司机总订单数,统计每个司机的当日,本周,本月,本季度,本年订单完成率
7.1、司机当日订单完成率
select
driver_id ,
driver_name ,
case when _effective_num_day=0 then '0.0%' else CONCAT(cast(round(_effective_num_day*100/_total_num_day, 2) as string),'%') end as _day_comple_rate
from
(select
_effective_driver_order_day.driver_id ,
_effective_driver_order_day.driver_name ,
_effective_driver_order_day._effective_num_day ,
_total_driver_order_day._total_num_day
from
_effective_driver_order_day left join _total_driver_order_day
on
_effective_driver_order_day.driver_id = _total_driver_order_day.driver_id
) tb
7.2、司机当周订单完成率
select
driver_id ,
driver_name ,
case when _effective_num_week=0 then '0.0%' else CONCAT(cast(round(_effective_num_week*100/_total_num_week, 2) as string),'%') end as _week_comple_rate
from
(select
_effective_driver_order_week.driver_id ,
_effective_driver_order_week.driver_name ,
_effective_driver_order_week._effective_num_week ,
_total_driver_order_week._total_num_week
from
_effective_driver_order_week left join _total_driver_order_week
on
_effective_driver_order_week.driver_id = _total_driver_order_week.driver_id
) tb
7.3、司机当月订单完成率
select
driver_id ,
driver_name ,
case when _effective_num_month=0 then '0.0%' else CONCAT(cast(round(_effective_num_month*100/_total_num_month, 2) as string),'%') end as _month_comple_rate
from
(select
_effective_driver_order_month.driver_id ,
_effective_driver_order_month.driver_name ,
_effective_driver_order_month._effective_num_month ,
_total_driver_order_month._total_num_month
from
_effective_driver_order_month left join _total_driver_order_month
on
_effective_driver_order_month.driver_id = _total_driver_order_month.driver_id
) tb
7.4、司机当季度订单完成率
select
driver_id ,
driver_name ,
case when _effective_num_quarter=0 then '0.0%' else CONCAT(cast(round(_effective_num_quarter*100/_total_num_quarter, 2) as string),'%') end as _quarter_comple_rate
from
(select
_effective_driver_order_quarter.driver_id ,
_effective_driver_order_quarter.driver_name ,
_effective_driver_order_quarter._effective_num_quarter ,
_total_driver_order_quarter._total_num_quarter
from
_effective_driver_order_quarter left join _total_driver_order_quarter
on
_effective_driver_order_quarter.driver_id = _total_driver_order_quarter.driver_id
) tb
7.5、司机当年订单完成率
select
driver_id ,
driver_name ,
case when _effective_num_year=0 then '0.0%' else CONCAT(cast(round(_effective_num_year*100/_total_num_year, 2) as string),'%') end as _year_comple_rate
from
(select
_effective_driver_order_year.driver_id ,
_effective_driver_order_year.driver_name ,
_effective_driver_order_year._effective_num_year ,
_total_driver_order_year._total_num_year
from
_effective_driver_order_year left join _total_driver_order_year
on
_effective_driver_order_year.driver_id = _total_driver_order_year.driver_id
) tb
7.6、汇总司机当日,当周,当月当季度,当年完成率指标
select
cast(year_quarter_month_week_tb.driver_id as string) as rk ,
cast(year_quarter_month_week_tb.driver_id as string) as driver_id,
cast(year_quarter_month_week_tb.driver_name as string) as driver_name,
cast(year_quarter_month_week_tb._year_comple_rate as string) as _year_comple_rate,
cast(if(year_quarter_month_week_tb._quarter_comple_rate is null , '0.0%' , year_quarter_month_week_tb._quarter_comple_rate) as string) as _quarter_comple_rate,
cast(if(year_quarter_month_week_tb._month_comple_rate is null , '0.0%' , year_quarter_month_week_tb._month_comple_rate) as string) as _month_comple_rate,
cast(if(year_quarter_month_week_tb._week_comple_rate is null , '0.0%' , year_quarter_month_week_tb._week_comple_rate) as string) as _week_comple_rate,
cast(if(_driver_order_day_status._day_comple_rate is null , '0.0%' , _driver_order_day_status._day_comple_rate) as string) as _day_comple_rate
from
(select
year_quarter_month_tb.driver_id ,
year_quarter_month_tb.driver_name ,
year_quarter_month_tb._year_comple_rate ,
year_quarter_month_tb._quarter_comple_rate ,
year_quarter_month_tb._month_comple_rate ,
_driver_order_week_status._week_comple_rate
from
(select
year_quarter_tb.driver_id ,
year_quarter_tb.driver_name ,
year_quarter_tb._year_comple_rate ,
year_quarter_tb._quarter_comple_rate ,
_driver_order_month_status._month_comple_rate
from
(select
_driver_order_year_status.driver_id ,
_driver_order_year_status.driver_name ,
_driver_order_year_status._year_comple_rate ,
_driver_order_quarter_status._quarter_comple_rate
from
_driver_order_year_status left join _driver_order_quarter_status
on
_driver_order_year_status.driver_id = _driver_order_quarter_status.driver_id
) year_quarter_tb left join _driver_order_month_status
on
year_quarter_tb.driver_id = _driver_order_month_status.driver_id
) year_quarter_month_tb left join _driver_order_week_status
on
year_quarter_month_tb.driver_id = _driver_order_week_status.driver_id
) year_quarter_month_week_tb left join _driver_order_day_status
on
year_quarter_month_week_tb.driver_id = _driver_order_day_status.driver_id
8、各城市新增注册司机数
统计各个城市当日,当周,当月,当季度,当年新增注册司机数
8.1、当日新增注册司机数
select
register_city ,
count(distinct id) _register_day_num
from
driver
where
date_format(create_time , 'yyyy-MM-dd') = '2019-07-15'
group by
register_city
8.2、当周新增注册司机数
select
register_city ,
count(distinct id) _register_week_num
from
driver
where
weekofyear(create_time) = '29'
group by
register_city
8.3、当月新增注册司机数
select
register_city ,
count(distinct id) _register_month_num
from
driver
where
date_format(create_time , 'yyyy-MM') = '2019-07'
group by
register_city
8.4、当季度新增注册司机数
select
register_city ,
count(distinct id) _register_quarter_num
from
driver
where
quarter(create_time) = '3'
and
date_format(create_time , 'yyyy') = date_format(NOW() , 'yyyy')
group by
register_city
8.5、当年新增注册司机数
select
register_city ,
count(distinct id) _register_year_num
from
driver
where
date_format(create_time , 'yyyy') = '2019'
group by
register_city
9.7、热门订单以及热门地区统计
1、统计每个六边形区域订单个数
通过uber的H3算法,我们知道,为了完整的覆盖所有的地区,我们可以将地区划分成一个个的六边形,而且六边形的半径范围我们也可以自定义,我们需要统计每个六边形的订单数量
在我们的travel_spark工程模块下的src/main/scala路径下创建package com.travel.transaction 然后创建scala object文件 HotOrderTransation.scala
import java.util
import com.travel.programApp.SparkSQLHBaseSink
import com.travel.tools.GetCenterPointFromListOfCoordinates
import com.uber.h3core.H3Core
import com.uber.h3core.util.GeoCoord
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
import redis.clients.jedis.GeoCoordinate
import scala.collection.JavaConverters
/**
* Created by laowang
*/
object HotOrderTransation {
val h3 = H3Core.newInstance()
def init(sparkSession: SparkSession): Unit ={
sparkSession.udf.register("locationToH3" , locationToH3 _)
lazy val orderHotTmp =(time:Int) =>
s"""
|select
|open_lng ,
|open_lat ,
|create_time ,
|begin_address_code ,
|locationToH3(open_lng , open_lat , 7) as h3Code
|from
|order
|where ${time} <= cast(date_format(create_time , 'yyyyMMdd') as int)
""".stripMargin
//经纬度 转成h3
sparkSession.sql(orderHotTmp(20190715)).createOrReplaceTempView("orderHotTmp")
lazy val getHotArea =
"""
|select tb2.h3Code ,
|tb2.num as count ,
|tb2.create_time ,
|tb2.begin_address_code
|from
|(select * ,
|row_number() over(partition by h3Code order by rank desc) num
|from
|(select * ,
|row_number() over(partition by h3Code order by create_time) rank
|from
|orderHotTmp) tb) tb2
|where tb2.num = 1
""".stripMargin
val rdd:RDD[Row] = sparkSession.sql(getHotArea).rdd
val reultHot: RDD[(String, String, String, Int)] = rdd.map { line =>
val h3Code = line.getAs[Long]("h3Code")
val count = line.getAs[Int]("count")
val create_time = line.getAs[String]("create_time")
val begin_address_code = line.getAs[String]("begin_address_code")
val geoCood: List[GeoCoord] = h3To6(h3Code)
val list = new util.ArrayList[GeoCoordinate]()
for (in <- geoCood) {
list.add(new GeoCoordinate(in.lng, in.lat))
}
val toList = JavaConverters.asScalaIteratorConverter(list.iterator()).asScala.toList
val centerPoint: GeoCoordinate = GetCenterPointFromListOfCoordinates.getCenterPoint(toList)
val rk = h3Code.toString
(rk, begin_address_code, centerPoint.getLongitude + "," + centerPoint.getLatitude, count)
}
import sparkSession.sqlContext.implicits._
val hotOrder = reultHot.toDF("rk" , "begin_address_code" , "centerPoint" , "count")
SparkSQLHBaseSink.saveToHBase(hotOrder,"hotOrder","rk","rk,begin_address_code,centerPoint,count")
}
//UDF 经纬度 --->h3编码
private def locationToH3(lat:Double , lon:Double , res:Int):Long = {
h3.geoToH3(lat , lon , res)
}
//h3 -->热区的那个点--->六边形
private def h3To6(geoCode:Long): List[GeoCoord] ={
val boundary: util.List[GeoCoord] = h3.h3ToGeoBoundary(geoCode)
JavaConverters.asScalaIteratorConverter(boundary.iterator()).asScala.toList
}
}
2、统计每个热门区域的订单数量
求取每个街道的订单数量总和,以及每个街道的订单增长情况
第一步:sql文件定义
在travel_spark项目模块下,创建package com.travel.sql ,然后再该pakcage下面创建scala的object文件 HotAreaOrderSQL.scala用于存放我们的sql语句
package com.travel.sql
/**
* Created by laowang
*/
object HotAreaOrderSQL {
//计算当日热区订单数--精度7,大概半径为1200米
lazy val orderHotTable_day =(time:String)=> s"""
|select
|open_lat ,
|open_lng ,
|create_time ,
|begin_address_code ,
|locationToH3(open_lat , open_lng , 7) as h3Code
| from order
| where date_format(create_time , 'yyyy-MM-dd') = '${time}'
""".stripMargin//date_format(NOW() , 'yyyy-MM-dd')
lazy val _today_hot_area = """
|select
|table2.begin_address_code ,
|h3ToCoordinate(table2.h3Code) coordinate,
|table2.h3Code ,
|1200 radius ,
|table2.rank as count
| from
|(select * ,
|row_number() over(partition by h3Code order by rank desc) num
|from
|(select * ,
|row_number() over(partition by h3Code order by create_time desc) rank
|from orderHotTable_day
|) table
|)table2
|where table2.num = 1
""".stripMargin
//求昨日热力订单情况 2019-07-14
lazy val yestdayOrderStatusTMP =(time:String)=> s"""
|select
|open_lat ,
|open_lng ,
|create_time ,
|begin_address_code ,
|locationToH3(open_lat , open_lng , 7) as h3Code
| from order
| where
| date_format(create_time , 'yyyy-MM-dd') = '${time}'
""".stripMargin//DATE_SUB(date_format(NOW() , 'yyyy-MM-dd'), 1)
lazy val yestdayOrderStatus = """
|select
|table2.begin_address_code ,
|h3ToCoordinate(table2.h3Code) coordinate ,
|table2.h3Code ,
|1200 radius ,
|table2.rank as count
| from
|(select * ,
|row_number() over(partition by h3Code order by rank desc) num
|from
|(select * ,
|row_number() over(partition by h3Code order by create_time desc) rank
|from yestdayOrderStatusTMP
|) table
|)table2
|where table2.num = 1
""".stripMargin
lazy val newHotOrderTmp =
"""
|select
|if(t_begin_address_code is null,y_begin_address_code , t_begin_address_code) as city_code,
|if(t_coordinate is null,y_coordinate , t_coordinate) as coordinate,
|if(t_radius is null , y_radius , t_radius) as radius ,
|if(t_count is null,0-if(y_count is null , 0 , y_count) , t_count-if(y_count is null,0,y_count)) as count
|from
|(
|select
|today_hot_area.begin_address_code as t_begin_address_code,
|today_hot_area.coordinate as t_coordinate ,
|today_hot_area.radius as t_radius ,
|today_hot_area.count as t_count ,
|yestdayOrderStatus.begin_address_code as y_begin_address_code ,
|yestdayOrderStatus.coordinate as y_coordinate ,
|yestdayOrderStatus.radius as y_radius ,
|yestdayOrderStatus.count as y_count
|from
|today_hot_area full outer join yestdayOrderStatus
|on
|today_hot_area.h3Code = yestdayOrderStatus.h3Code
|) tb
""".stripMargin
lazy val _hot_order ="""
|select
|cast(_today_hot_df.city_code as string) as rk ,
|cast(_today_hot_df.city_code as string) as _city_code ,
|cast(_today_hot_df.city_num as string) as _day_order_num ,
|cast(_new_hot_df.city_num as string) as _new_orders_num
|from
|_today_hot_df left join _new_hot_df
|on
|_today_hot_df.city_code = _new_hot_df.city_code
""".stripMargin
}
第二步:定义方法调用入口
在travel_spark项目的src/main/scala路径下,创建package com.travel.transaction,然后在该package下面创建
com.travel.transaction.HotAreaOrder.scala的object文件
import com.travel.programApp.SparkSQLHBaseSink
import com.travel.sql.HotAreaOrderSQL
import com.uber.h3core.H3Core
import com.uber.h3core.util.GeoCoord
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
/**
* Created by angel
*/
object HotAreaOrder {
val h3 = H3Core.newInstance
def init(sparkSession:SparkSession): Unit ={
_thrid_hotArea_order(sparkSession)
}
//========================当日热区订单、当日新增热区订单=================
def _thrid_hotArea_order(sparkSession:SparkSession): Unit ={
sparkSession.udf.register("locationToH3" , locationToH3 _)
sparkSession.udf.register("h3ToCoordinate" , h3ToCoordinate _)
//计算当日热区订单数--精度7,大概半径为1200米
//1):将经纬度转换成H3编码
sparkSession.sql(HotAreaOrderSQL.orderHotTable_day("2019-07-15")).createOrReplaceTempView("orderHotTable_day")
//2):得到当日的热区订单 ,生成dataframe用来处理每个街道的热区
val today_hot_area = sparkSession.sql(HotAreaOrderSQL._today_hot_area)
.toDF("begin_address_code" , "coordinate","h3Code" , "radius" , "count")
//注册临时表,用来做后面的当日新增热区
today_hot_area.createOrReplaceTempView("today_hot_area")
//根据当日热区情况,获取每个街道的热区
val today_hot_area_rdd: RDD[Row] = today_hot_area.rdd
//(街道编码 , (街道编码 , 当日热区经纬度 ,当日热区半径,当日热区数量 ))
val data: RDD[(String, Iterable[(String, String, Int, Int)])] = today_hot_area_rdd.map {
line =>
//当日下单的街道经纬度编码
val begin_address_code = line.getAs[String]("begin_address_code")
//当日热区经纬度坐标
val coordinate = line.getAs[String]("coordinate")
//当日热区半径
val radius = line.getAs[Int]("radius")
//当日热区数量
val count = line.getAs[Int]("count")
(begin_address_code, coordinate, radius, count)
}.groupBy(line => line._1)
//返回当日对每个街道热区情况计数(街道编码,数量,详情)(begin_address_code, coordinate, radius, count)
val parseData: RDD[(String, Int, List[(String, String, Int, Int)])] = data.map {
line =>
var count = 0
for (dat <- line._2) {
count = count + dat._4
}
//(街道编码,数量,详情) (begin_address_code, coordinate, radius, count)
(line._1, count, line._2.toList)
}
import sparkSession.sqlContext.implicits._
//city_code | city_hot_num | [[coordinate , radius , count] ,[coordinate , radius , count] .... ]
val _today_hot_df = parseData.toDF("city_code" , "city_num" , "today_hot_arr")
//当提新增热区订单数
//求昨日热力订单情况
sparkSession.sql(HotAreaOrderSQL.yestdayOrderStatusTMP("2019-07-14")).createOrReplaceTempView("yestdayOrderStatusTMP")
sparkSession.sql(HotAreaOrderSQL.yestdayOrderStatus).createOrReplaceTempView("yestdayOrderStatus")
//根据昨日的热区订单,与今日热区订单做对比,得到日新增订单
val yest_hot_new_status = sparkSession.sql(HotAreaOrderSQL.newHotOrderTmp)
.toDF("city_code" , "coordinate" , "radius" , "count")
//得到当日热区和昨日热区对比后的每个街道的热区情况
val yest_hot_new_status_rdd = yest_hot_new_status.rdd
//依然对街道编码做分组,只不过这次是新增后的热区情况
val _new_hot = yest_hot_new_status_rdd.map {
line =>
//昨日下单的街道经纬度编码
val city_code = line.getAs[String]("city_code")
//昨日热区经纬度坐标
val coordinate = line.getAs[String]("coordinate")
//昨日热区半径
val radius = line.getAs[Int]("radius")
//昨日热区数量
val count = line.getAs[Int]("count")
(city_code, coordinate, radius, count)
}.groupBy(line => line._1)
//对每个街道热区情况计数
val _new_hot_parse = _new_hot.map {
line =>
var count = 0
for (dat <- line._2) {
count = count + dat._4
}
(line._1, count, line._2.toList)
}
import sparkSession.sqlContext.implicits._
//新增热区的dataframe
val _new_hot_df = _new_hot_parse.toDF("city_code" , "city_num" , "yest_hot_arr")
//新增热区临时表
_new_hot_df.createOrReplaceTempView("_new_hot_df")
//当日热区临时表
_today_hot_df.createOrReplaceTempView("_today_hot_df")
//当日热区订单、当日新增热区订单
val _hot_order = sparkSession.sql(HotAreaOrderSQL._hot_order)
_hot_order.show()
SparkSQLHBaseSink.saveToHBase(_hot_order,"_hot_order","rk","rk,_city_code,_day_order_num,_new_orders_num")
// DataFrameToHbase.save(_hot_order , "_hot_order" , "rk" , 1 , false)
}
//经纬度转h3Code
def locationToH3(lat:Double ,lon:Double , res:Int ): Long ={
h3.geoToH3(lat, lon, res)
}
//根据h3编码,转成经纬度
def h3ToCoordinate(line:String):String = {
val geo: GeoCoord = h3.h3ToGeo(line.toLong)
geo.lat+","+geo.lng
}
}