Hbase数据映射到Hive

Hbase数据映射到Hive

//新建库 并使用库
 create database events;
 use events;
 
 
 //设置变量名为db   指向库名 events
 set hivevar:db=events;

 
 //设置允许所有的分区列都是动态分区列00000000000000000000
 set hive.exec.dynamic.partition.mode=nonstrict;
 
 
  //设置允许动态分区功能
 SET hive.optimize.sort.dynamic.partition=true;
 
 
 //关闭map阶段的表关联
 SET hive.auto.convert.join=false;
 
 //hive 路径下查询各个文件夹存储情况  或者可以直接用 df -h 来查看磁盘使用情况  防止磁盘过满而引起建表不成功
 [root@hadoop222 hive]# du -h -x --max-depth=1
1.6M    ./examples
104K    ./docs
24K     ./conf
2.0M    ./scripts
20M     ./hcatalog
7.8M    ./auxlib
113M    ./lib
160K    ./bin
32K     ./data
0       ./warehouse
145M    .

//查询yarn节点的工作状态
[root@hadoop222 hive]# yarn node -list all
21/06/07 12:03:17 INFO client.RMProxy: Connecting to ResourceManager at hadoop222/192.168.146.222:8032
21/06/07 12:03:17 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Total Nodes:1
         Node-Id             Node-State Node-Http-Address       Number-of-Running-Containers
 hadoop222:36360                RUNNING    hadoop222:8042                                  0


 
 //关联hbase里面的users表到hive (外部表)
 
 create external table ${db}.hb_users(user_id String, birth_year INT,gender String,locale String,location String,time_zone String,joined_at String)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties ('hbase.columns.mapping'=':key,profile:birthyear,profile:gender,region:locale,region:location,region:timezone,registration:joinedAt')
tblproperties ('hbase.table.name'='events_db:users');


// 创建内部表 users 将hb_users外部数据 以orc的格式存储到hive内部表users中
//  提高了读取效率
create table ${db}.users
stored as orc as
select * from ${db}.hb_users;


// 删除外部表 , 因为生成内部表后就不需要外部表了

drop table if exists ${db}.hb_users;

其他的表都是一样的重复操作

//继续创建 events表的映射
create external table ${db}.hb_events(
event_id String,
user_id String,
event_time String,
city String,
state String,
zip String,
country String,
lat Float,
lng Float,
common_words String
)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties 
('hbase.columns.mapping'='
:key,
creator:user_id,
schedule:start_time,
location:city,
location:state,
location:zip,
location:country,
location:lat,
location:lng,
remark:common_words
')
tblproperties ('hbase.table.name'='events_db:events');


//根据events外部表生成内部表
create table ${db}.events
stored as orc as
select * from ${db}.hb_events;


// 删除外部表 , 因为生成内部表后就不需要外部表了
drop table if exists ${db}.hb_events;



//user_friend 外部表

create external table ${db}.hb_user_friend(
row_key String,
user_id String,
friend_id String
)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties 
('hbase.columns.mapping'='
:key,
uf:userid,
uf:friendid
')
tblproperties ('hbase.table.name'='events_db:user_friend');

//user_friend 内部表
create table ${db}.user_friend
stored as orc as
select * from ${db}.hb_user_friend;

 // 删除外部表 , 因为生成内部表后就不需要外部表了
drop table if exists ${db}.hb_user_friend;

//event_attendees 外部表
create external table ${db}.hb_event_attendee(
row_key String,
event_id String,
user_id String,
attend_type String
)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties 
('hbase.columns.mapping'='
:key,
euat:eventid,
euat:friendid,
euat:state
')
tblproperties ('hbase.table.name'='events_db:event_attendee');


//event_attendees 内部表

create table ${db}.event_attendee
stored as orc as
select * from ${db}.hb_event_attendee;

 // 删除外部表 , 因为生成内部表后就不需要外部表了
drop table if exists ${db}.hb_event_attendee;



//train 外部表
create external table ${db}.hb_train(
row_key String,
user_id String,
event_id String,
invited String,
timestamp String,
interested String
)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties 
('hbase.columns.mapping'='
:key,
eu:user,
eu:event,
eu:invited,
eu:timestamp,
eu:interested
')
tblproperties ('hbase.table.name'='events_db:train');



//train 内部表

create table ${db}.train
stored as orc as
select * from ${db}.hb_train;

 // 删除外部表 , 因为生成内部表后就不需要外部表了
drop table if exists ${db}.hb_train;
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值