离线Hive数据仓库学习 dws_events

use dws_events;

create database if not exists dws_events;

-- 在dws层记录一下,每一位用户有多少个朋友  user_friend_count;
drop table if exists user_friend_count;
create table if not exists user_friend_count stored as orc as
    select userid,count(*) friendcount from dwd_events.user_friend group by userid;
select * from user_friend_count;

-- 在dws层记录下,每件事情不同状态的人数
drop table if exists event_attendee_count;
create table if not exists event_attendee_count stored as orc as
    select eventid,state,count(friendid) attendCount from dwd_events.event_attendee group by eventid,state;
select * from event_attendee_count;

select * from dwd_events.event_attendee;

--  eventid   friendid invited   maybe   no   yes
--    1111     xiaofan   1          0    1    0

--   eventid   friendid invited   maybe   no   yes
--    1111     xiaofan   1          0     0    0
--    1111     xiaofan   0          0      1    0
-- 在event_attendee表中,找出事件中,每个用户在四种状态中的值(例如,gree儿子结婚这件事情,邀请 xiaofan ,xiaofan同意参加)
-- 在event_attendee表中,找出事件中,每个用户在四种状态中的值(例如,今天晚上happy K歌,邀请 liuyong ,louyong不参加)
create table if not exists event_user_state stored as orc as
select eventid,friendid,
       case when state='invited' then 1 else 0 end as invited,
       case when state='maybe' then 1 else 0 end as maybe_attendee,
       case when state='no' then 1 else 0 end as no_attendee,
       case when state='yes' then 1 else 0 end as yes_attendee
       from dwd_events.event_attendee;

select * from event_user_state where maybe_attendee=1 and invited=1;

create table if not exists user_event_state stored as orc as
select friendid as attend_user_id,eventid,
       max(invited) as invited,
       max(maybe_attendee) as maybe_attend,
       max(no_attendee) as no_attend,
       max(yes_attendee) as yes_attend
       from event_user_state
group by friendid, eventid;


select * from user_event_state where no_attend=1 and yes_attend=1;


-- 朋友在事件中的参与状态
-- 如果 我 有三个朋友 小王,小李,李赵
--  小王   小明结婚    小王参加
--  小李   小明结婚    小李参加
--  小赵   小明结婚    小赵参加

create table if not exists friend_attendevent_state stored as orc as
select uf.userid,
       uf.friendid,
       ues.eventid,
       ues.invited,
       ues.maybe_attend,
       ues.no_attend,
       ues.yes_attend
from dwd_events.user_friend uf
    left join user_event_state ues on uf.friendid=ues.attend_user_id;

select * from friend_attendevent_state where eventid is not null;

-- 查看 用户的朋友中对于某一事件,被邀请的人数是多少,参加人数是多少,不参加的人数是多少等等
-- userid  eventid     invited_friends_count  maybe_friends_count no_friends_count yes_friends_count
-- gree    朋友孩子过生日     20                   3                   1               16
create table if not exists friend_attend_summary stored as orc as
select userid,eventid,
       sum(invited) invited_friends_count,
       sum(maybe_attend) maybe_friends_count,
       sum(no_attend) no_friends_count,
       sum(yes_attend) yes_friends_count
       from friend_attendevent_state where eventid is not null
group by userid, eventid;;


select state from dwd_events.event_attendee group by state;

--
select t.city,t.count,33 - row_number() over () from
(select city, count(*) count from dwd_events.events group by city order by count desc limit 32) t;

create table event_cities stored as orc as
select t.city,row_number() over (order by count desc) as level from
(select city, count(*) count from dwd_events.events group by city order by count desc limit 32) t;
select * from event_cities;

select * from dwd_events.events;

create table event_countries stored as orc as
select t.country,row_number() over (order by t.count desc) as level from
(select country,count(*) count from dwd_events.events group by country order by count desc limit 8) t;
select * from event_countries;


select count(*) from dwd_events.events limit 32;
-- train  训练数据集
select * from dwd_events.train;
-- 用与参与事件的数量
create table user_event_count stored as orc as
select userid,count(eventid) event_count from dwd_events.train group by userid;

// 特征工程  建表 a b c d f e 联系人
// eventid  userid  intr  特征1,特征2,特征3,特征4,特征5,特征6,特征7,

set hive.auto.convert.join=false

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器学习模型机器
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值