hive二级分区

几年前的工作了,放在草稿箱2,3年了,整理一下吧。

内容主要是扩展了hive的分区方式,原生的hive支持多字段等值分区,对于很有用的range、hash、list等数据库传统的分区方式不支持,因此这个做了二级的多类分区的扩展。

这里只是antlr的定义,显然你还需要在查询、存储层做更多的工作来支持这种分区工作,量还是不小的,我们的工作不具参考性,这里只列一下sql解析的修改,给大家做参考。


语法定义

tablePartition
@init { msgs.push("table partition specification"); }
@after { msgs.pop(); }
: KW_PARTITIONED KW_BY partParamList=partitionParamList
tableSubPartition?
partitionTemplate?
-> ^(TOK_PARTITIONED_BY $partParamList
tableSubPartition?
partitionTemplate?)
;

tableSubPartition
@init { msgs.push("table subPartition specification"); }
@after { msgs.pop(); }
:KW_SUBPARTITIONED KW_BY partParamList=partitionParamList
subPartitionTemplate?
-> ^(TOK_SUBPARTITIONED_BY $partParamList
subPartitionTemplate?)
;

partitionParamList
@init { msgs.push(" partitionParamList specification"); }
@after { msgs.pop(); }
:(LPAREN (columnNameTypeList
|columnNameList
| functionList
) RPAREN)
| (columnNameTypeList
|columnNameList
| functionList)
;

partitionTemplate
@init { msgs.push("table PartitionTemplate specification"); }
@after { msgs.pop(); }
:partitionExper (COMMA partitionExper)*
-> ^(TOK_PARTITION_EXPER partitionExper+)
;

subPartitionTemplate
@init { msgs.push(" subPartitionTemplate specification"); }
@after { msgs.pop(); }
: (subPartitionExper (COMMA subPartitionExper)*)
-> ^(TOK_SUBPARTITION_EXPER subPartitionExper+)
;

partitionExper
@init { msgs.push("partitionExper specification"); }
@after { msgs.pop(); }
:KW_PARTITION partition_name=partitionName partitionValuesExper tableSubPartition?
-> ^(TOK_PARTITION $partition_name partitionValuesExper tableSubPartition?)
;

partitionValuesExper
@init { msgs.push(" partitionValuesExper specification"); }
@after { msgs.pop(); }
:KW_VALUES KW_LESS KW_THEN LPAREN value=stringOrNumOrFunc RPAREN
-> ^(TOK_VALUES_LESS $value )
|KW_VALUES KW_GREATER KW_THEN LPAREN value=stringOrNumOrFunc RPAREN
-> ^(TOK_VALUES_GREATER $value )
| KW_VALUES LPAREN valueList=stringOrNumOrFuncList RPAREN
-> ^(TOK_VALUES $valueList )
;

subPartitionExper
@init { msgs.push("subPartitionExper specification"); }
@after { msgs.pop(); }
/*
:KW_SUBPARTITION partitionName (
(KW_VALUES ((KW_LESS|KW_GREATER) KW_THEN) LPAREN (stringOrNumOrFunc) RPAREN)
| KW_VALUES LPAREN (stringOrNumOrFuncList) RPAREN)
;
*/
:KW_SUBPARTITION partition_name=partitionName partitionValuesExper
-> ^(TOK_SUBPARTITION $partition_name partitionValuesExper)
;

stringOrNumOrFunc
@init { msgs.push("table stringOrNumOrFunc specification"); }
@after { msgs.pop(); }
:StringLiteral
|Number
|function
;

stringOrNumOrFuncList
@init { msgs.push("table stringOrNumOrFuncList specification"); }
@after { msgs.pop(); }
:(stringOrNumOrFunc (COMMA stringOrNumOrFunc)*)
-> ^(TOK_STR_OR_NUM_OR_FUNC stringOrNumOrFunc+)
;

partitionName
@init { msgs.push("table partitionName specification"); }
@after { msgs.pop(); }
:
Identifier
;

datawarehouseStatement
@init { msgs.push("table datawarehouseStatement specification"); }
@after { msgs.pop(); }
:
KW_ALTER KW_DW KW_DIRECT LPAREN dwNo=Number COMMA sql=StringLiteral RPAREN
-> ^(TOK_ALTER_DW $dwNo $sql)
;

//added by zjw to customize table partition
functionList
@init { msgs.push("table partition function specification"); }
@after { msgs.pop(); }
:
function (COMMA function)* -> ^(TOK_TABLEPARTCOLS (function)+)
;


JDO定义:

<field name="partitionKeys" table="PARTITION_KEYS" >
<collection element-type="MFieldSchema"/>
<join>
<primary-key name="PARTITION_KEY_PK">
<column name="TBL_ID"/>
<column name="PKEY_NAME"/>
</primary-key>
<column name="TBL_ID"/>
</join>
<element>
<embedded>
<field name="name">
<column name="PKEY_NAME" length="128" jdbc-type="VARCHAR"/>
</field>
<field name="type">
<column name="PKEY_TYPE" length="767" jdbc-type="VARCHAR" allows-null="false"/>
</field>
<field name="comment" >
<column name="PKEY_COMMENT" length="4000" jdbc-type="VARCHAR" allows-null="true"/>
</field>
<!-- added by zjw for subpartitions -->
<field name="part_num">
<column name="P_NUM" jdbc-type="integer"/>
</field>
<field name="part_level">
<column name="P_LEVEL" jdbc-type="integer"/>
</field>
<field name="part_type">
<column name="P_TYPE" length="256" jdbc-type="VARCHAR" allows-null="false"/>
</field>
<field name="part_type_param" >
<column name="P_TYPE_PARAM" length="767" jdbc-type="VARCHAR" allows-null="true"/>
</field>
<!-- added by zjw for subpartitions -->
<field name="subPartitionKeys" table="SUB_PARTITION_KEYS" >
<collection element-type="MFieldSchema"/>
<join>
<primary-key name="PARTITION_KEY_PK">
<column name="TBL_ID"/>
<column name="PKEY_NAME"/>
</primary-key>
<column name="TBL_ID"/>
</join>
<element>
<embedded>
<field name="name">
<column name="PKEY_NAME" length="128" jdbc-type="VARCHAR"/>
</field>
<field name="type">
<column name="PKEY_TYPE" length="767" jdbc-type="VARCHAR" allows-null="false"/>
</field>
<field name="comment" >
<column name="PKEY_COMMENT" length="4000" jdbc-type="VARCHAR" allows-null="true"/>
</field>
<field name="part_num">
<column name="P_NUM" jdbc-type="integer"/>
</field>
<field name="part_type">
<column name="P_TYPE" length="256" jdbc-type="VARCHAR" allows-null="false"/>
</field>
<field name="part_type_param" >
<column name="P_TYPE_PARAM" length="767" jdbc-type="VARCHAR" allows-null="true"/>
</field>
</embedded>
</element>
</field>
</embedded>
</element>
</field>


分区key定义用途有2个方面:
[list]
1 加载需要确定如何划分加载的文件个数和数据路由
2 查询如何按照分区方式,优化查询计划
[/list]

[table]
|p_key|tbl_id|p_type|integer_idx
[/table]

[table]
|p_key|tbl_id|p_type|integer_idx
[/table]
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值