离线Hive数据仓库学习 dwd_events

离线Hive数据仓库学习
hbase(main):005:0> count 'events_db:train'

flume -> kafka -> hbase -> hive

[root@gree139 hive110]# nohup ./bin/hive --service hiveserver2 &

[root@gree139 hive110]# nohup ./bin/hive --service metastore &

//避免在map阶段聚合

set hive.auto.convert.join=false

确认业务 --> 声明粒度 --> 确认维度 --> 确认事实

ODS	数据准备区	贴源层	//原始数据
DWD	细节数据层	// 脱敏/清洗
DWS	数据服务层	//轻度汇总	聚合
DM	//大宽表,基于某个业务,把不同维度的信息通过粒度聚合起来

DWT	主题层	大宽表
ADS	//数据应用层,精确数据,为不同用户所使用
//创建
create database dwd_events;

use dwd_events;

//  处理 user ,将hbase中的users表映射到hive中,再以orc格式保存到hive表中
create external table hb_users(
    userid string,
    birthyear int,
    gender string,
    locale string,
    joinedAt string,
    location string,
    timezone string
)stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties
('hbase.columns.mapping'=':key,profile:birthyear,profile:gender,region:locale,registration:joinedAt,region:location,region:timezone')
tblproperties ('hbase.table.name'='events_db:users');

select * from hb_users;

create table users stored as orc as
    select * from hb_users;

select * from users;
drop table if exists hb_users;

// events 将hbase中 events表数据映射到hive表中,并将以orc格式保存
create external  table hb_events(
    eventid string,
    user_id string,
    city string,
    state string,
    zip string,
    country string,
    lat float,
    lng float,
    common_words string,
    start_time string
) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ('hbase.columns.mapping'=":key,creator:user_id,location:city,location:state,location:zip,location:country,location:lat,location:lng,remark:common_words,schedule:start_time")
tblproperties ('hbase.table.name'='events_db:events');

select * from hb_events;

create table events stored as orc as
    select  * from hb_events;

select * from events;

drop table if exists hb_events;

show tables;

//eventattend
create external table hb_event_attendee(
    row_key string,
    eventid string,
    friendid string,
    state string
) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ('hbase.columns.mapping'=':key, euat:eventid,euat:friendid,euat:state')
tblproperties ('hbase.table.name'='events_db:event_attendee');

create table event_attendee stored as orc as
    select * from hb_event_attendee;

select * from event_attendee;
drop table if exists hb_event_attendee;

// userfriend
create external table hb_user_friend(
    row_key string,
    userid string,
    friendid string
) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ('hbase.columns.mapping'=':key, uf:userid,uf:friendid')
tblproperties ('hbase.table.name'='events_db:user_friend');

drop table if exists user_friend;
create table user_friend stored as orc as
    select * from hb_user_friend;

drop table if exists hb_user_friend;

//  train
create external table hb_train(
    rowkey string,
    userid string,
    eventid string,
    invited string,
    timestamp string,
    interested string
) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ('hbase.columns.mapping'=':key, eu:user,eu:event,eu:invited,eu:timestamp,eu:interested')
tblproperties ('hbase.table.name'='events_db:train');

select * from hb_train where userid!='user';

use dwd_events;

drop table  if exists train;
create table train stored as orc as
    select * from hb_train where userid!='user';

select count(*) from train;

select * from train where userid='user';

drop table if exists hb_train;

/**
[root@gree139 kb15tmp]# hdfs dfs -mkdir -p /events/ods/locale
22/01/11 15:28:31 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@gree139 kb15tmp]# hdfs dfs -mkdir -p /events/ods/timezone
22/01/11 15:28:38 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@gree139 kb15tmp]# hdfs dfs -put ./locale.txt /events/ods/locale
22/01/11 15:28:54 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@gree139 kb15tmp]# hdfs dfs -put ./time_zone.csv /events/ods/timezone
22/01/11 15:29:10 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
*/
create external  table ods_local(
    local_id int,
    locale string
) row format delimited fields terminated by "\t"
location '/events/ods/locale';
select * from ods_local;

create table local stored as orc as
    select * from ods_local;

select * from local;
drop table if exists ods_local;

// time_zone
create external table ods_time_zone(
    time_zone_id int,
    time_zone string
)row format delimited fields terminated by ','
location '/events/ods/timezone';

select * from ods_time_zone;
create table time_zone stored as orc as
    select * from ods_time_zone;

drop table if exists ods_time_zone;
select * from time_zone;

use dws_events;
-- user_attend_event_count
-- 基本event_attendee 统计 每个user 分别接受到的事件邀请的个数,参加事件的个数,拒绝事件的个数,可能参加事件的个数
-- userid  invited_count  maybe_attend_count  no_attend_count  yes_attend_count
-- 111       12             0                   0                  12
-- 222       10             0                   10                  0
create table user_attend_event_count stored as orc as
    select attend_user_id as userid,
           sum(invited) as invited_count,
           sum(maybe_attend) as maybe_attend_count,
           sum(no_attend) as no_attend_count,
           sum(yes_attend) as yes_attend_count
           from user_event_state
 group by attend_user_id;








评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值