1.
ANALYZE TABLE XXX COMPUTE STATISTICS;
2. 合并小文件
-- Only RCFile and ORCFile Formats are supported right now
非分区表
alter table XXX concatenate;
分区表
alter table XXX partition(dt='2022-06-06') concatenate;
3. hive 字段名字修改后,值为null
解决方案,按照字段顺序读取值
alter table XXX set tblproperties ('parquet.column.index.access'='true');
4. 查看hive 版本
hive --version
# Hive 2.3.7-amzn-3
5.通过元数据库查看信息
SELECT DBS.DB_ID
,DBS.NAME
-- ,TBL_ID
-- ,TBL_NAME
,TBLS.*
,COLUMNS_V2.CD_ID
,COLUMNS_V2.COLUMN_NAME
,COLUMNS_V2.TYPE_NAME
,COLUMNS_V2.COMMENT
,INTEGER_IDX
from TBLS
LEFT join DBS on TBLS.DB_ID=DBS.DB_ID
left join SDS on TBLS.SD_ID=SDS.SD_ID
LEFT join COLUMNS_V2 on SDS.CD_ID=COLUMNS_V2.CD_ID
where DBS.NAME = 'db_name' and TBL_NAME = 'table_name'
-- and COLUMN_NAME = 'col_name'
ORDER BY INTEGER_IDX, TBL_ID
select * from PARTITIONS where TBL_ID = '3638'
select * from PARTITION_KEYS -- parttion info