Hive整合HBase表
创建Hive表时同时创建HBase表
此种方式如果删除Hive表,HBase表也会被删除,数据被清空;此种方式需要单独修改HBase表的属性比如压缩方式等
示例如下:
create table cdc_addr_hb(row_key string,
row_id string,
created string,
created_by string,
last_upd string,
last_upd_by string,
corp_id string,
org_id string,
postn_id string,
src_row_id string,
src_created string,
src_created_by string,
src_last_upd string,
src_last_upd_by string,
data_source string,
data_type string,
oso_row_id string,
consumer_id string,
par_consumer_id string,
addr_type string,
country string,
province string,
city string,
district string,
addr string,
uesd_times string,
valid_flag string,
default_flag string,
last_used_time string,
channel string,
part_dt string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH serdeproperties("hbase.columns.mapping" = ":key,
addr:row_id,
addr:created,
addr:created_by,
addr:last_upd,
addr:last_upd_by,
addr:corp_id,
addr:org_id,
addr:postn_id,
addr:src_row_id,
addr:src_created,
addr:src_created_by,
addr:src_last_upd,
addr:src_last_upd_by,
addr:data_source,
addr:data_type,
addr:oso_row_id,
addr:consumer_id,
addr:par_consumer_id,
addr:addr_type,
addr:country,
addr:province,
addr:city,
addr:district,
addr:addr,
addr:uesd_times,
addr:valid_flag,
addr:default_flag,
addr:last_used_time,
addr:channel,
addr:part_dt") tblproperties("hbase.table.name" = "linkcdc:cdc_addr");
alter 'linkcdc:cdc_addr',{NAME => 'addr',TTL => '604800',COMPRESSION=>'SNAPPY'}
基于HBase表创建Hive外部表
此种方式需要首先创建HBase表,然后基于HBase表创建Hive外部表,删除Hive表时,HBase表不会被删除,数据不会被清空。此种方式无法直接truncate表,如果需要清空数据,需要通过其他方式处理,比如insert overwrite或者先改成内部表清空后再改成外部表,
示例如下:
create 'linkcdc:cdc_addr',{NAME => 'addr',TTL => '604800',COMPRESSION=>'SNAPPY'}
create external table cdc_addr_hb(row_key string,
row_id string,
created string,
created_by string,
last_upd string,
last_upd_by string,
corp_id string,
org_id string,
postn_id string,
src_row_id string,
src_created string,
src_created_by string,
src_last_upd string,
src_last_upd_by string,
data_source string,
data_type string,
oso_row_id string,
consumer_id string,
par_consumer_id string,
addr_type string,
country string,
province string,
city string,
district string,
addr string,
uesd_times string,
valid_flag string,
default_flag string,
last_used_time string,
channel string,
part_dt string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH serdeproperties("hbase.columns.mapping" = ":key,
addr:row_id,
addr:created,
addr:created_by,
addr:last_upd,
addr:last_upd_by,
addr:corp_id,
addr:org_id,
addr:postn_id,
addr:src_row_id,
addr:src_created,
addr:src_created_by,
addr:src_last_upd,
addr:src_last_upd_by,
addr:data_source,
addr:data_type,
addr:oso_row_id,
addr:consumer_id,
addr:par_consumer_id,
addr:addr_type,
addr:country,
addr:province,
addr:city,
addr:district,
addr:addr,
addr:uesd_times,
addr:valid_flag,
addr:default_flag,
addr:last_used_time,
addr:channel,
addr:part_dt") tblproperties("hbase.table.name" = "linkcdc:cdc_addr");