CREATE EXTERNAL
TABLE IF NOT EXISTS data_zh(
ROWKEY STRING,
STATION INT,
YEAR INT,
MONTH INT,
DAY INT,
HOUR INT,
MINUTE INT,
)
PARTITIONED BY (AGE INT)
指定分区(此列并没真正存储列,也就是不存于你的数据中。但是如果你的数据从Oracle按年份导出,按照年份分区,把每一年数据放到age对应的目录下)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
字段分隔符
LINES TERMINATED BY ‘\n’ 行分隔符
STORED AS TEXTFILE; 作为文本存储
Hive加载数据到表中
加载数据
LOAD DATA INPATH
‘/data/’ OVERWRITE INTO TABLE data_zh;
#加载某个目录下所有数据,存在分区数据不能这样加载
分区加载数据
创建分区
ALTER TABLE data_zhp ADD PARTITION(AGE= 1998)
加载数据
LOAD DATA INPATH ‘/data/1998.txt’ INTO TABLE data_zhp
PARTITION(YEAR=1998);
指定分区目录
LOCATION ‘
/hiveuser/hive/warehouse/data_zh.db/data_zh/2012.txt’;(指定分区所在位置)
加载数据到指定分区,分区表加载方法
下面为将现有表,修改为分区表。注意创建分区的时候未指定分区表,不能创建分区。需要新建表哥,用动态分区导入数据,动态分区数量有限。参考Programming
Hive记得最大上限应该是10000
Hive修改现有表添加分区
添加分区
ALTER TABLE data_zh ADD IF NOT EXISTS
PARTITION (year = 1998) LOCATION
‘/hiveuser/hive/warehouse/data_zh.db/data_zh/1998.txt’
删除分区
ALTER TABLE data_zhp DROP IF EXISTS PARTITION(year =1998);
修改现有表添加分区
创建分区表
CREATE EXTERNAL TABLE IF NOT EXISTS data_zhp(
ROWKEY STRING,
STATION INT,
MONTH INT,
DAY INT,
HOUR INT,
MINUTE INT,
)
PARTITIONED BY (YEAR INT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
LINES TERMINATED BY ‘\n’
STORED AS TEXTFILE;
动态分区指令
set
hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
#set hive.enforce.bucketing = true;
未分区表数据导入分区表
insert overwrite table data_zhp partition (YEAR,MONTH) select *
from data_zh;