use dws_events;
create database if not exists dws_events;
-- 在dws层记录一下,每一位用户有多少个朋友 user_friend_count;
drop table if exists user_friend_count;
create table if not exists user_friend_count stored as orc as
select userid,count(*) friendcount from dwd_events.user_friend group by userid;
select * from user_friend_count;
-- 在dws层记录下,每件事情不同状态的人数
drop table if exists event_attendee_count;
create table if not exists event_attendee_count stored as orc as
select eventid,state,count(friendid) attendCount from dwd_events.event_attendee group by eventid,state;
select * from event_attendee_count;
select * from dwd_events.event_attendee;
-- eventid friendid invited maybe no yes
-- 1111 xiaofan 1 0 1 0
-- eventid friendid invited maybe no yes
-- 1111 xiaofan 1 0 0 0
-- 1111 xiaofan 0 0 1 0
-- 在event_attendee表中,找出事件中,每个用户在四种状态中的值(例如,gree儿子结婚这件事情,邀请 xiaofan ,xiaofan同意参加)
-- 在event_attendee表中,找出事件中,每个用户在四种状态中的值(例如,今天晚上happy K歌,邀请 liuyong ,louyong不参加)
create table if not exists event_user_state stored as orc as
select eventid,friendid,
case when state='invited' then 1 else 0 end as invited,
case when state='maybe' then 1 else 0 end as maybe_attendee,
case when state='no' then 1 else 0 end as no_attendee,
case when state='yes' then 1 else 0 end as yes_attendee
from dwd_events.event_attendee;
select * from event_user_state where maybe_attendee=1 and invited=1;
create table if not exists user_event_state stored as orc as
select friendid as attend_user_id,eventid,
max(invited) as invited,
max(maybe_attendee) as maybe_attend,
max(no_attendee) as no_attend,
max(yes_attendee) as yes_attend
from event_user_state
group by friendid, eventid;
select * from user_event_state where no_attend=1 and yes_attend=1;
-- 朋友在事件中的参与状态
-- 如果 我 有三个朋友 小王,小李,李赵
-- 小王 小明结婚 小王参加
-- 小李 小明结婚 小李参加
-- 小赵 小明结婚 小赵参加
create table if not exists friend_attendevent_state stored as orc as
select uf.userid,
uf.friendid,
ues.eventid,
ues.invited,
ues.maybe_attend,
ues.no_attend,
ues.yes_attend
from dwd_events.user_friend uf
left join user_event_state ues on uf.friendid=ues.attend_user_id;
select * from friend_attendevent_state where eventid is not null;
-- 查看 用户的朋友中对于某一事件,被邀请的人数是多少,参加人数是多少,不参加的人数是多少等等
-- userid eventid invited_friends_count maybe_friends_count no_friends_count yes_friends_count
-- gree 朋友孩子过生日 20 3 1 16
create table if not exists friend_attend_summary stored as orc as
select userid,eventid,
sum(invited) invited_friends_count,
sum(maybe_attend) maybe_friends_count,
sum(no_attend) no_friends_count,
sum(yes_attend) yes_friends_count
from friend_attendevent_state where eventid is not null
group by userid, eventid;;
select state from dwd_events.event_attendee group by state;
--
select t.city,t.count,33 - row_number() over () from
(select city, count(*) count from dwd_events.events group by city order by count desc limit 32) t;
create table event_cities stored as orc as
select t.city,row_number() over (order by count desc) as level from
(select city, count(*) count from dwd_events.events group by city order by count desc limit 32) t;
select * from event_cities;
select * from dwd_events.events;
create table event_countries stored as orc as
select t.country,row_number() over (order by t.count desc) as level from
(select country,count(*) count from dwd_events.events group by country order by count desc limit 8) t;
select * from event_countries;
select count(*) from dwd_events.events limit 32;
-- train 训练数据集
select * from dwd_events.train;
-- 用与参与事件的数量
create table user_event_count stored as orc as
select userid,count(eventid) event_count from dwd_events.train group by userid;
// 特征工程 建表 a b c d f e 联系人
// eventid userid intr 特征1,特征2,特征3,特征4,特征5,特征6,特征7,
set hive.auto.convert.join=false
离线Hive数据仓库学习 dws_events
于 2022-01-17 22:15:36 首次发布