use paascloud
database |tableName |isTemporary|
---------|-------------------------|-----------|
paascloud|ads_order_cube |false |
paascloud|dwd_order_info |false |
paascloud|dwd_order_info_mysql |false |
paascloud|t_order |false |
paascloud|t_order_mysql |false |
paascloud|t_org_flat |false |
paascloud|t_uac_organization |false |
paascloud|tbwork_order |false |
paascloud|tbwork_order_track_record|false |
paascloud|tbworkorder_config |false |
paascloud|tbworkorder_config_detail|false |
paascloud|wfs_order_list_index |false |
show tables
drop table if exists dwd_order_info_mysql
create table dwd_order_info_mysql(
orderId string,
institutionId string,
platfromFiledCode string,
serviceFlowAlias string,
orderSource string,
orderSourceName string,
orderStatus string,
orderStatusName string,
comefrom string,
operate_type string,
urgent string,
importance string,
supervise_num string,
rework_num string,
timeout string,
corcode_f4 string,name_f4 string,
corcode_f3 string,name_f3 string,
corcode_f2 string,name_f2 string,
corcode_f1 string,name_f1 string
)
partitioned by (dt string)
stored as parquet
location '/warehouse/paascloud/dwd/dwd_order_info_mysql'
desc dwd_order_info_mysql
==============================================================
select * from tbwork_order
select * from tbworkorder_config
select count(*) from tbworkorder_config
select * from tbwork_order_track_record
select count(*) from tbwork_order_track_record
select * from tbworkorder_config_detail
select COUNT(*) from tbworkorder_config_detail
select
a.id as orderId
--a.institution_id as institutionId,
--b.platfrom_filed_code as platfromFiledCode,
--a.order_source as orderSource,
--a.status as orderStatus
from tbwork_order a
left join tbworkorder_config b on a.order_large_type=b.id
left join tbwork_order_track_record c on a.id=c.work_order_id
where a.dt='2020-05-31'
desc wfs_order_track_index
select count(*) from wfs_order_track_index
select * from wfs_order_list_index where orderid='464259524432457728'
select * from wfs_order_track_index where workOrderId='464259524432457728'
select workOrderId from wfs_order_track_index group by workOrderId
drop table if exists dwd_order_basic
create table dwd_order_basic(
orderId string,
institutionId string,
platfromFiledCode string,
serviceFlowAlias string,
orderSource string,
orderSourceName string,
orderStatus string,
orderStatusName string,
comefrom string,
isurgent string,
isimportance string,
issupervise string,
isrework string,
actualhour string,
second_hour string,
third_hour string,
man_hour string,--该工单对应的服务类型下的定义工时
istimeout string,
operatelifecycle string,--操作进度生命周期
--audit_star string,--审核评价星级
--revisit_star string,--回访评价星级
--custom_star string,--客户评价星级
--isexception string,--是否异常单
--t_p string,--提单人
--d_p string,--处理人
corcode_f4 string,name_f4 string,
corcode_f3 string,name_f3 string,
corcode_f2 string,name_f2 string,
corcode_f1 string,name_f1 string
)
partitioned by (dt string) --业务上的创建时间
stored as parquet
location '/warehouse/paascloud/dwd/dwd_order_basic'
--将当天导入的数据,根据创建时间动态分区到对应的天,注意分区定义字段不要大小写混用
set hive.exec.dynamic.partition.mode=nonstrict
insert overwrite table dwd_order_basic partition(dt)
select
max(orderId),
max(institutionId),
max(platfromFiledCode),
max(serviceFlowAlias),
max(orderSource),
max(orderSourceName),
max(orderStatus),
max(orderStatusName),
'es',
max(if(urgent=1 or urgent=2,1,0)) as isurgent,
max(importance) as isimportance,
max(if(superviseNum>0,1,0)) as issupervise,
max(if(reworkNum>0,1,0)) as isrework,
max(a.actualHour) as actualhour,
max(b.man_hour) as second_hour,
max(c.man_hour) as third_hour,
max(case
when orderThirdlyType is not null then c.man_hour
when orderThirdlyType is null and orderSecondType is not null then b.man_hour
end) as man_hour,
max(case
when orderThirdlyType is not null then if(a.actualHour>c.man_hour,1,0)
when orderThirdlyType is null and orderSecondType is not null then if(a.actualHour>b.man_hour,1,0)
end) as istimeout
,
--concat_ws('|',collect_list(if(date_format(d.createTime,'yyyy-MM-dd')='2020-05-31',d.operateType,'N'))) as operatelifecycle
concat_ws('|',collect_list(concat(date_format(d.createTime,'yyyy-MM-dd'),'#',d.operateType))) as operatelifecycle --所有的生命周期
,
max(corcode_f4),max(name_f4),
max(corcode_f3),max(name_f3),
max(corcode_f2),max(name_f2),
max(corcode_f1),max(name_f1)
,
max(date_format(a.orderCreateTime,'yyyy-MM-dd')) as dt
from wfs_order_list_index a
left join tbworkorder_config_detail b on a.orderSecondType=b.id
left join tbworkorder_config_detail c on a.orderThirdlyType=c.id --根据工单服务类别,提取工单定义的时间
left join wfs_order_track_index d on d.workOrderId=a.orderid --提取工单操作类型过程的生命周期
left join t_org_flat org on a.institutionId=org.corcode_f1 --集成组织结构(区域或者项目信息)
where a.dt='2020-05-31'
group by orderid
refresh table wfs_order_list_index
desc tbworkorder_config_detail
select * from dwd_order_basic where orderid='464259524432457728'
select * from dwd_order_basic
select count(*) from dwd_order_basic
select count(*) from wfs_order_list_index where dt='2020-05-31'
=========================================================================================================
#!/bin/bash
db=paascloud
# 定义变量方便修改
hive=/root/module/hive1/bin/hive
sparksql=/root/module/spark/bin/spark-sql
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
echo "===日期为 $do_date==="
sql="
use $db;
set hive.exec.dynamic.partition.mode=nonstrict;
set spark.sql.shuffle.partitions=10;
insert overwrite table dwd_order_basic partition(dt)
select
max(orderId),
max(institutionId),
max(platfromFiledCode),
max(serviceFlowAlias),
max(orderSource),
max(orderSourceName),
max(orderStatus),
max(orderStatusName),
'es',
max(if(urgent=1 or urgent=2,1,0)) as isurgent,
max(importance) as isimportance,
max(if(superviseNum>0,1,0)) as issupervise,
max(if(reworkNum>0,1,0)) as isrework,
max(a.actualHour) as actualhour,
max(b.man_hour) as second_hour,
max(c.man_hour) as third_hour,
max(case
when orderThirdlyType is not null then c.man_hour
when orderThirdlyType is null and orderSecondType is not null then b.man_hour
end) as man_hour,
max(case
when orderThirdlyType is not null then if(a.actualHour>c.man_hour,1,0)
when orderThirdlyType is null and orderSecondType is not null then if(a.actualHour>b.man_hour,1,0)
end) as istimeout
,
--concat_ws('|',collect_list(if(date_format(d.createTime,'yyyy-MM-dd')='$do_date',d.operateType,'N'))) as operatelifecycle
concat_ws('|',collect_list(concat(date_format(d.createTime,'yyyy-MM-dd'),'#',d.operateType))) as operatelifecycle --所有的生命周期
,
max(corcode_f4),max(name_f4),
max(corcode_f3),max(name_f3),
max(corcode_f2),max(name_f2),
max(corcode_f1),max(name_f1)
,
max(date_format(a.orderCreateTime,'yyyy-MM-dd')) as dt
from wfs_order_list_index a
left join tbworkorder_config_detail b on a.orderSecondType=b.id
left join tbworkorder_config_detail c on a.orderThirdlyType=c.id --根据工单服务类别,提取工单定义的时间
left join wfs_order_track_index d on d.workOrderId=a.orderid --提取工单操作类型过程的生命周期
left join t_org_flat org on a.institutionId=org.corcode_f1 --集成组织结构(区域或者项目信息)
where a.dt='$do_date'
group by orderid
;
"
$sparksql -e "$sql"
============================================================================================================
--当天被操作的所有工单对象
--
--count(1)|
----------|
-- 4944|
--select count(*) from wfs_order_track_index where date_format(createTime,'yyyy-MM-dd')='2020-05-31'
--
--count(1)|
----------|
-- 1156|
--
--select count(*) from (select workOrderId from wfs_order_track_index where date_format(createTime,'yyyy-MM-dd')='2020-05-31' group by workOrderId)t
--
--
--select workOrderId from wfs_order_track_index where date_format(createTime,'yyyy-MM-dd')='2020-05-31' group by workOrderId --t_track
--select
--t_track.workOrderId,basic.*
--from (
--select workOrderId from wfs_order_track_index where date_format(createTime,'yyyy-MM-dd')='2020-05-31' group by workOrderId
--)t_track
--left join dwd_order_basic basic on t_track.workorderid =basic .orderid
--
use paascloud
desc dwd_order_basic
select * from dwd_order_basic
select count(*) from dwd_order_basic
================================================================================工单基本信息集成组织信息
===========================================================================
工单结果报表涉及的的维度
drop table if exists ads_order_result_cube
create table ads_order_result_cube(
dt string,
areaId string,
institutionId string,
platfromFiledCode string,
orderSource string,
orderStatus string,
cancel_cnts string,--取消单
urgent_cnts string,
importance_cnts string,
supervise_cnts string,
rework_cnts string,
timeout_cnts string,
gq_cnts string,--挂起数
transfer_cnts string,--转单数
tjxt_cnts string,--添加协同数
back_cnts string,--退回数
unpass_cnts string,--审核不通过数
cnts string,
dim string
)
row format delimited fields terminated by '\t'
location '/warehouse/paascloud/ads/ads_order_result_cube'
insert overwrite table ads_order_result_cube
select
'2020-05-31',
corcode_f2 as areaId,
institutionId,
platfromFiledCode,
orderSource,
orderStatus,
sum(if(orderStatus='ORDER_YQX' or orderStatus='ORDER_YCGB',1,0)) as cancel_cnts,
sum(if(isurgent='1',1,0)) as isurgent_cnts,
sum(if(isimportance='Y',1,0)) as isimportance_cnts,
sum(if(issupervise='1',1,0)) as issupervise_cnts,
sum(if(isrework='1',1,0)) as isrework_cnts,
sum(if(istimeout='1',1,0)) as istimeout_cnts,
sum(if(array_contains(split(operatelifecycle,'\\|'),concat('2020-05-31','#','ORDER_GQ')),1,0)) as gq_cnts,
sum(if(array_contains(split(operatelifecycle,'\\|'),concat('2020-05-31','#','TRANSFER_ORDER')),1,0)) as transfer_cnts,
sum(if(array_contains(split(operatelifecycle,'\\|'),concat('2020-05-31','#','ORDER_TJXT')),1,0)) as tjxt_cnts,
sum(if(array_contains(split(operatelifecycle,'\\|'),concat('2020-05-31','#','SEND_BACK')),1,0)) as back_cnts,
sum(if(array_contains(split(operatelifecycle,'\\|'),concat('2020-05-31','#','AUDIT_ORDER')),1,0)) as unpass_cnts,
count(*) as cnts,
regexp_replace(regexp_replace(regexp_replace(lpad(bin(cast(grouping_id() as bigint)),5,'0'),"0","x"),"1","0"),"x","1") as dim
FROM
--(
--select
--t_track.workOrderId,basic.*
--from (
--select workOrderId from wfs_order_track_index where date_format(createTime,'yyyy-MM-dd')='2020-05-31' group by workOrderId
--)t_track
--left join dwd_order_basic basic on t_track.workorderid =basic .orderid
--)t_all
dwd_order_basic where dt='2020-05-31'
group by
corcode_f2,
institutionId,
platfromFiledCode,orderSource,orderStatus
with cube
use paascloud
select COUNT(*) from ads_order_result_cube
select * from ads_order_result_cube
select * from ads_order_result_cube where dim='00000'
select * from ads_order_result_cube where dim='11111' order by institutionId,platfromFiledCode,orderSource,orderStatus
select count(*) from wfs_order_list_indeX where orderstatus='ORDER_YCGB' and dt='2020-05-31'
use paascloud
select array_contains(split('2020-05-31#TAKE_ORDER|2020-05-31#AUDIT_ORDER','\\|'),'2020-05-31#TAKE_ORDER') from dwd_order_basic