表
内外部表
- 表与数据同在/不同在
- 删除表的区别
- 创建一个表实例
create <external> table cr_cdma_fin
(
bil_month string COMMENT "中文注释",
accs_nbr string,
asset_row_id string,
fin_amt float,
owe_amt float,
owe_month string,
owe_dur string,
latn_id string)
<partitioned by (month string)>
row format delimited
fields terminated by '\t'
<location '/credit/bic/m/cr_cdma_fin/'>;
(多级/静态/动态)分区表
- 结合业务场景来理解 例如按城市分区、按时间分区等
- 意义包括:化大为小,方便管理;查询效率提高
- 不是实际表字段,但可以像表字段一样使用
- 查看分区
SHOW PARTITIONS <table name>
alter table <table name> add if not exists partition (p1 = "xxx",p2 = "yyy") location '/....../xxx/yyy';
INSERT OVERWRITE table test_beilun partition (p1="aa",p2="bb")
select xx,xx,xx
from xxx
where yyy;
ALTER TABLE <table name> DROP IF EXISTS PARTITION(p="xxx");
- 动态分区
- 大批量多级分区数据导入;二级分区未知,需要自动识别
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table partition_test partition(stat_date='20110728',province)
select member_id,name,province from partition_test_input where stat_date='20110728';
导入数据
导入本地数据
LOAD DATA LOCAL INPATH '/home/hadoop/name/tmp' INTO TABLE tmp <PARTITION (day = XXX)>;
导入集群数据
LOAD DATA INPATH '/home/hadoop/name/tmp' INTO TABLE tmp <PARTITION (day = XXX)>;
alter table NET_CDR_DS_FIX_O add if not exists partition (dt=20151027) location '/SOURCE/RAW/D/20151027';
从别的表中select数据到对应分区
INSERT OVERWRITE table test_beilun partition (day=20150514)
select id,timestamps,url,ref,day,hour
from table_name
where day= 20150514 and parse_url(concat("http://",url),'HOST') like '%4399.com';
<