hive动态分区设置
set hive.exec.dynamic.partition=true;(可通过这个语句查看:set hive.exec.dynamic.partition;)
set hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions=100000;(如果自动分区数大于这个参数,将会报错)
SET hive.exec.max.dynamic.partitions.pernode=100000;
hive 设置队列
SET mapreduce.job.queuename=root.up;
SET mapreduce.job.priority=HIGH;
set tez.queue.name=cmbi;
hive 设置引擎
set hive.execution.engine=mr;
set hive.execution.engine=spark;
set hive.execution.engine=tez;
with
窗口分析函数
分析窗口函数应用场景:
(1)用于分区排序
(2)动态Group By
(3)Top N
(4)累计计算
(5)层次查询
SUM,AVG,MIN,MAX+over
NTILE,ROW_NUMBER,RANK,DENSE_RANK,CUME_DIST,PERCENT_RANK
LAG,LEAD,FIRST_VALUE,LAST_VALUE
GROUPING SETS,GROUPING__ID,CUBE,ROLLUP
查询分区字段信息
select T5.CD_ID ,T3.CD_ID
from TBLS T1,DBS T2,SDS T3,PARTITIONS T4,SDS T5
WHERE T2.NAME = ‘ods_real_madrid’
AND T1.TBL_NAME = ‘t_user_valid_check’
AND T1.DB_ID = T2.DB_ID
AND T1.SD_ID = T3.SD_ID
AND T1.TBL_ID =T4.TBL_ID
AND T4.SD_ID = T5.SD_ID
and T1.OWNER = ‘hadoop’
;
修改字段备注
ALTER TABLE dw.bds_base_order_all CHANGE test_flag test_flag string COMMENT ‘测试标识 1 正常 2 测试’;
ALTER TABLE dw.bds_base_order_all CHANGE is_succeed is_succeed string COMMENT ‘资产放款状态,1 放款成功 2 放款失败(签约失败、支付失败、资产赎回)’;
ALTER TABLE dw.bds_base_order_all CHANGE is_bank_succeed is_bank_succeed string COMMENT ‘资金放款状态,1 放款成功 2 退单(黑名单、管户权)’;
表新增字段
alter table dw.bds_cl_borrow_m add columns
(wsm_service_fee_rate
string COMMENT ‘微神马服务费率’,
capital_advance_fee_rate
string COMMENT ‘垫资费率’,
user_white_id
BIGINT COMMENT ‘用户白名单ID’,
goods_order_no
string COMMENT ‘商品订单号’,
interest_rate_method
string COMMENT ‘计息方式’
);
##load data
LOAD DATA LOCAL INPATH ‘/path/to/local/files’ OVERWRITE INTO TABLE test
##创建表
CREATE TABLE IF NOT EXISTS test.test2(
id string,name string
)
COMMENT ‘xxx’
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’ NULL DEFINED AS ‘’;
修改表备注
ALTER TABLE table_name SET TBLPROPERTIES (‘comment’ = “xxx”);
修改字段备注
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
alter table XXX CHANGE COLUMN colX colY String comment ‘XX’;
修改字段位置
ALTER TABLE XXX CHANGE COLUMN col1 col1 STRING AFTER col2;
执行计划
explain sql
查询分区
show partitions xxx
查看函数用法
desc function extended concat
日期函数
date_format(“2017-11-17”,‘yyyy-MM-dd’)
from_unixtime(“unixtime”[,“yyyy-MM-dd”])
datediff(date1, date2)
current_date()
字符串截取
substr(xxx, 1, 2)
##分析函数和窗口函数
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+WindowingAndAnalytics#LanguageManualWindowingAndAnalytics-WINDOWclause
LEAD,LAG,FIRST_VALUE,LAST_VALUE
分析函数
RANK,ROW_NUMBER,DENSE_RANK,CUME_DIST,PERCENT_RANK,NTILE
over
COUNT,SUM,MIN,MAX,AVG
hive左关联
left join
左边小表 右边大表 可以输出多少张数据? 左面的表可以全部出来