show databases;
create table data1(xxsj string,
fjrnc string,
fjrzh bigint,
fjrxb string,
fjrIP string,
fjrxt string,
fjrsjxh string,
fjrwlzs string,
fjrjps string,
sjrnc string,
sjrIP string,
sjrzh bigint,
sjrxt string,
sjrsjxh string,
sjrwlzs string,
sjrjps string,
sjrxb string,
xxlx string,
sfjl string,
xx string);
load data local inpath '/root/chat_data-10W.csv' into table data1;
load data inpath '/shixun/data/chat_data-10W.csv' into table data1;
select *
from data1 limit 10;
create table etl as select *,
substr(xxsj,0,10) as dayinfo, substr(xxsj,12,2) as hourinfo, --获取天和小时
split(fjrjps,",")[0] as sender_lng, split(fjrjps,",")[1] as sender_lat --提取经度纬度
from data1
where length(fjrjps) > 0 ;
create table jrxxl
comment "今日消息总量"
as
select
dayinfo,
count(*) as total_msg_cnt
from etl
group by dayinfo;
create table fsrs_jsrs
comment "今日发送消息人数、接受消息人数"
as
select
dayinfo,
count(distinct fjrzh) as sender_usr_cnt,
count(distinct sjrzh) as receiver_usr_cnt
from etl
group by dayinfo;
create table tb_rs_hour_msg_cnt
comment "每小时消息量、发送和接收用户数"
as
select
dayinfo,
hourinfo,
count(*) as total_msg_cnt,
count(distinct fjrzh) as sender_usr_cnt,
count(distinct sjrzh) as receiver_usr_cnt
from etl
group by dayinfo, hourinfo;
create table tb_rs_susr_top10
comment "今日发送消息最多的Top10用户 "
as
select
dayinfo,
fjrnc as username,
count(*) as sender_msg_cnt
from etl
group by dayinfo, fjrnc
order by sender_msg_cnt desc
limit 10;
create table tb_rs_sender_os
comment "发送人的设备操作系统分布情况"
as
select
dayinfo,
fjrxt,
count(distinct fjrzh) as cnt
from etl
group by dayinfo, fjrxt;
create table if not exists tb_rs_sender_phone
comment "发送人的手机型号分布"
as
select
dayinfo,
fjrsjxh,
count(distinct fjrzh) as cnt
from etl
group by dayinfo,fjrsjxh;
create table if not exists tb_rs_sender_wllx
comment "发送人的网络类型分布情况"
as
select
dayinfo,
fjrwlzs,
count(distinct fjrzh) as cnt
from etl
group by dayinfo, fjrwlzs;
create table tb_rs_loc_cnt
comment "今日各地区发送消息总量"
as
select
dayinfo,
fjrjps,
cast(sender_lng as double) as longitude,
cast(sender_lat as double) as latitude,
count(*) as total_msg_cnt
from etl
group by dayinfo,fjrjps,sender_lng,sender_lat;
实训项目代码
最新推荐文章于 2024-07-24 17:30:44 发布