实训项目代码

show databases;
create table data1(xxsj string,
                   fjrnc string,
                   fjrzh bigint,
                   fjrxb string,
                   fjrIP string,
                   fjrxt string,
                   fjrsjxh string,
                   fjrwlzs string,
                   fjrjps string,
                   sjrnc string,
                   sjrIP string,
                   sjrzh bigint,
                   sjrxt string,
                   sjrsjxh string,
                   sjrwlzs string,
                   sjrjps string,
                   sjrxb string,
                   xxlx string,
                   sfjl string,
                   xx string);
load data local inpath '/root/chat_data-10W.csv' into table data1;
load data inpath '/shixun/data/chat_data-10W.csv' into table data1;
select *
from data1 limit 10;
create table etl as select *,
substr(xxsj,0,10) as dayinfo, substr(xxsj,12,2) as hourinfo, --获取天和小时
split(fjrjps,",")[0] as sender_lng, split(fjrjps,",")[1] as sender_lat --提取经度纬度
from data1
where length(fjrjps) > 0 ;
create table jrxxl
comment "今日消息总量"
as
select
dayinfo,
count(*) as total_msg_cnt
from etl
group by dayinfo;
create table  fsrs_jsrs
comment "今日发送消息人数、接受消息人数"
as
select
dayinfo,
count(distinct fjrzh) as sender_usr_cnt,
count(distinct sjrzh) as receiver_usr_cnt
from etl
group by dayinfo;
create table tb_rs_hour_msg_cnt
comment "每小时消息量、发送和接收用户数"
as
select
dayinfo,
hourinfo,
count(*) as total_msg_cnt,
count(distinct fjrzh) as sender_usr_cnt,
count(distinct sjrzh) as receiver_usr_cnt
from etl
group by dayinfo, hourinfo;
create table tb_rs_susr_top10
comment "今日发送消息最多的Top10用户 "
as
select
dayinfo,
fjrnc as username,
count(*) as sender_msg_cnt
from etl
group by dayinfo, fjrnc
order by sender_msg_cnt desc
limit 10;
create table tb_rs_sender_os
comment "发送人的设备操作系统分布情况"
as
select
dayinfo,
fjrxt,
count(distinct fjrzh) as cnt
from etl
group by dayinfo, fjrxt;
create table if not exists tb_rs_sender_phone
comment "发送人的手机型号分布"
as
select
dayinfo,
fjrsjxh,
count(distinct fjrzh) as cnt
from etl
group by dayinfo,fjrsjxh;
create table if not exists tb_rs_sender_wllx
comment "发送人的网络类型分布情况"
as
select
dayinfo,
fjrwlzs,
count(distinct fjrzh) as cnt
from etl
group by dayinfo, fjrwlzs;
create table tb_rs_loc_cnt
comment "今日各地区发送消息总量"
as
select
dayinfo,
fjrjps,
cast(sender_lng as double) as longitude,
cast(sender_lat as double) as latitude,
count(*) as total_msg_cnt
from etl
group by dayinfo,fjrjps,sender_lng,sender_lat;
 

  • 4
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值