https://blog.csdn.net/qq_33689414/article/details/80328665 hive关联hbase的配置文件
hive和hbase同步
https://cwiki.apache.org/confluence/display/Hive
https://cwiki.apache.org/confluence/display/Hive/HBaseIntegration
1、在hive的配置文件增加属性:hive-site.xml
hbase.zookeeper.quorum
master:2181,node1:2181,node2:2181
hbase.zookeeper.quorum
master,node1,node2
1.1.2 修改hive-env.sh文件
添加一个环境变量
export HIVE_CLASSPATH=$HIVE_CLASSPATH:/hadoop/hbase/lib/*
2、在hive中创建临时表
一、
CREATE TABLE hbasetbl(key int, value string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'/存储格式的jar包
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val")//serdeproperties 属性 一一对应
TBLPROPERTIES ("hbase.table.name" = "xyz", "hbase.mapred.output.outputtable" = "xyz");//输出表名
二、
put 'xyz','1111','cf1:name','zhangsan'
put 'xyz','1111','cf1:val','lisi'
三、
set hive.exec.mode.local.auto=true; //设置为本地模式
insert into hbasetbl values(222,'wangwu');
查看hdfs
hbase> flush 'xyz' //执行刷新后,hdfs才会同步
3.
一、
CREATEEXTERNAL TABLE tmp_order
(key string, id string, user_id string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,order:order_id,order:user_id")
TBLPROPERTIES ("hbase.table.name" = "t_order");
外部表必须先在hbase建立表(在hive删除这个表会有什么影响?)
二、
create 't_order','order'
在执行
CREATE EXTERNAL TABLE tmp_order
(key string, id string, user_id string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,order:order_id,order:user_id")
TBLPROPERTIES ("hbase.table.name" = "t_order");
put 't_order','1','order:order_id','1'
put 't_order','1','order:user_id','1'
select * from tmp_order;
hive
insert into tmp_order values(2,2,2);
一、两个角度
1.用户
2.会话
二、mysql数据表
stats_view_depth
CREATE TABLE `stats_view_depth` (
`platform_dimension_id` bigint ,
`data_dimension_id` bigint ,
`kpi_dimension_id` bigint ,
`pv1` bigint ,
`pv2` bigint ,
`pv3` bigint ,
`pv4` bigint ,
`pv5_10` bigint ,
`pv10_30` bigint ,
`pv30_60` bigint ,
`pv60_plus` bigint ,
`created` string
) row format delimited fields terminated by '\t';
三、数据
1.用户ID
2.时间
3.URL
4.PV
四、HIVE和HBASE对应关系
set hive.exec.mode.local.auto=true;
1.在hive中创建hbase的event_log对应表
CREATE EXTERNAL TABLE event_logs(
key string, pl string, en string, s_time bigint, p_url string, u_ud string, u_sd string
) ROW FORMAT SERDE 'org.apache.hadoop.hive.hbase.HBaseSerDe'
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties('hbase.columns.mapping'=':key,log:pl,log:en,log:s_time,log:p_url,log:u_ud,log:u_sd')
tblproperties('hbase.table.name'='eventlog');
分组条件
UUID
PL
count(url):1,2,3,4.... (hive_case_when)
3.hive创建临时表:把hql分析之后的中间结果存放到当前的临时表。
CREATE TABLE `stats_view_depth_tmp`(`pl` string, `date` string, `col` string, `ct` bigint);
然后先看第七步的操作SQL,再回头看udf
-- 4. 编写UDF(platformdimension & datedimension)
packagecom.yjsj.transformer.hive;importcom.yjsj.common.DateEnum;importcom.yjsj.transf