- 1.建Orc压缩格式的数据库
CREATE TABLE IF NOT EXISTS ods.test(
id STRING COMMENT 'id',
time INT COMMENT 'data time of server time',
datas STRING COMMENT 'data'
)
comment ' with partition type first then years then months then days'
PARTITIONED BY (type STRING,years STRING,months STRING,days STRING)
STORED AS ORC
;
--修复分区表
MSCK REPAIR TABLE ods.test
;
show partitions ods.test;
drop table ods.test;
- 2.往库ods.test中写入数据库
set hive.exec.dynamic.partition.mode=nonstrict;
set mapred.max.split.size=1024000000;
set hive.merge.mapredfiles= true;
set hive.merge.smallfiles.avgsize=256000000;
INSERT INTO TABLE ods.test PARTITION(type,years,months,days)
SELECT id,server_t,datas,type,years,months,days
from ods.test_current
WHERE years='${years}' AND months='${months}' AND days='${days}';
- 3.建临时表,并覆盖插入文件
set mapred.reduce.tasks = 1;
create table if not exists temp.test_function(
function string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
;
insert overwrite table temp.test_function
SELECT DISTINCT function
from ods.test
where concat(years,months,days) in (concat('${years0}','${months0}','${days0}'),concat('${years1}','${months1}','${days1}'),concat('${years}','${months}','${days}'))
;
- 4.查询结果保存到HDFS文件目录下
##因为CDH集群设置了Kerberos身份认证,对该操作做了限制,故使用建临时表,并覆盖插入文件的方式
set mapred.reduce.tasks = 1;
insert overwrite directory '/user/wang.nengjie/jars/Test_Analysis/input_file'
SELECT DISTINCT b.function
from ods.test_${years}_${months}_${days}
WHERE years='${years}' AND months='${months}' AND days='${days}' ;