基于Hive数仓实现需求开发
需求指标统计
需求
- 统计今日总消息量
- 统计今日每小时消息量、发送和接收用户数
- 统计今日各地区发送消息数据量
- 统计今日发送消息和接收消息的用户数
- 统计今日发送消息最多的Top10用户
- 统计今日接收消息最多的Top10用户
- 统计发送人的手机型号分布情况
- 统计发送人的设备操作系统分布情况
指标1:统计今日消息总量
create table if not exists tb rs total msg cnt
comment “今日消息总量”
as
select
dayinfo,
count( *) as total msg_cnt
from db msg.tb msg_etl
group by dayinfo;
指标2: 统计每小时消息量、发送和接收用户数
create table if not exists tb_rs hour_msg_cnt
comment “每小时消息量趋势”
as
select
dayinfo,
hourinfo,
count(*) as total _msg_cnt,
count(distinct sender_account) as sender_usr_cnt,
count(distinct receiver_account) as receiver usr cnt
from db _msg.tb msg etl
group by dayinfo, hourinfo;
指标3: 统计今日各地区发送消息总量
create table if not exists tb_rs_loc_cnt
comment"今日各地区发送消息总量"
as
select
dayinfo,
sender_gps,
cast(sender lng_as_double) as longitude,
cast(sender lat_as_double) as latitude,
count(*) as total msg cnt
from db msg.tb msg_etl
group by dayinfo,sender gps,sender Ing,sender lat;
指标4: 统计今日发送和接收用户人数
create table if not exists tb_rs_usr_cnt
comment"今日发送消息人数、接受消息人数"
as
select
dayinfo,
count(distinct sender_account) as sender usr cnt,
count(distinct receiver_account)as receiver usr cnt
from db_msg.tb msg_etl
group by dayinfo;
指标5: 统计发送消息条数最多的Top10用户
create table if not exists tb_rs_susr top10
comment"发送消息条数最多的Top10用户"
as
select
dayinfo.
sender_name as username,
count( *) as sender msg_cnt
from db msg.tb_msg_etl
group by dayinfo,sender_name
order by sender msg cnt desc
limit 10;
指标6: 统计接收消息条数最多的Top10用户
create table if not exists tb_rs_susr_top10
comment"发送消息条数最多的Top10用户"
as
select
dayinfo,
receiver_name as username,
count(*)as receiver_msg_cnt
from db_msg.tb_msg_etl
group by dayinfo,receiver_name
order by receiver_msg_cnt desc
limit 10;
指标7: 统计发送人的手机型号分布情况
create table if not exists tb_rs_sender_phone
comment"发送人的手机型号分布"
as
select
dayinfo,
sender_phonetype,
count(distinct sender_account) as cnt
from tb_msg_etl
group by dayinfo,sender_phonetype;
指标8: 统计发送人的操作系统分布
create table if not exists tb_rs_sender_os
comment"发送人的OS分布"
as
select
dayinfo,
sender_os,
count(distinct sender_ account) as cnt
from tb_msg _etl
group by dayinfo,sender_os;