tips
1.
create external table ext_user(id int, mail String, count int) partitioned by (ds string) row format delimited fields terminated by '\t' stored as textfile location '/dsap/rawdata/user'; hdfs location 路径需为文件夹,或不存在(执行后会自动创建文件夹),另 date为保留关键字
2.
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120412/user.txt
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120413/user.txt
hadoop fs -ls /dsap/rawdata/user
drwxr-xr-x - root supergroup 0 2012-04-13 16:46 /dsap/rawdata/user/20120412
drwxr-xr-x - root supergroup 0 2012-04-13 16:46 /dsap/rawdata/user/20120413
3.
load data inpath '/dsap/rawdata/user/20120412' overwrite into table ext_user partition (ds='20120412');
load data inpath '/dsap/rawdata/user/20120413' overwrite into table ext_user partition (ds='20120413');
后
hadoop fs -ls /dsap/rawdata/user
drwxr-xr-x - root supergroup 0 2012-04-13 16:46 /dsap/rawdata/user/ds=20120412
drwxr-xr-x - root supergroup 0 2012-04-13 16:46 /dsap/rawdata/user/ds=20120413
alter table ext_user add partition (ds='20120411') location '/dsap/rawdata/user/20120411';
后
hadoop fs -ls /dsap/rawdata/user
drwxr-xr-x - root supergroup 0 2012-04-13 17:06 /dsap/rawdata/user/20120411
drwxr-xr-x - root supergroup 0 2012-04-13 16:46 /dsap/rawdata/user/ds=20120412
drwxr-xr-x - root supergroup 0 2012-04-13 16:46 /dsap/rawdata/user/ds=20120413
4.
select count(*) from ext_user where ds='20120413';
5.
show partitions ext_user;
only test:
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120415/user.txt
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120416/user.txt
fs -ls
drwxr-xr-x - root supergroup 0 2012-04-13 17:59 /dsap/rawdata/user/20120415
drwxr-xr-x - root supergroup 0 2012-04-13 17:59 /dsap/rawdata/user/20120416
then
load data inpath '/dsap/rawdata/user/20120415' overwrite into table ext_user1 partition (ds='20120415');
fs -ls
drwxr-xr-x - root supergroup 0 2012-04-13 18:02 /dsap/rawdata/user/20120416
drwxr-xr-x - root supergroup 0 2012-04-13 17:59 /dsap/rawdata/user/ds=20120415 (20120415->ds=20120415)
do a tricky test:
load data inpath '/dsap/rawdata/user/ds=20120415' overwrite into table ext_user1 partition (ds='ds=20120415');
fs -ls
drwxr-xr-x - root supergroup 0 2012-04-13 18:02 /dsap/rawdata/user/20120416
drwxr-xr-x - root supergroup 0 2012-04-13 17:59 /dsap/rawdata/user/ds=ds%3D20120415 (ds=20120415->ds=ds%3D20120415)
hadoop fs -mv /dsap/rawdata/user/ds=ds%3D20120415 /dsap/rawdata/user/20120415
load data inpath '/dsap/rawdata/user/20120415' overwrite into table ext_user1 partition (ds='test');
drwxr-xr-x - root supergroup 0 2012-04-13 17:59 /dsap/rawdata/user/ds=test
conclusion: load data inpath 分区时候会使hdfs路径修改, mv 原路径 partition(分区=值)的形式
alter table ext_user1 add partition (ds=20120416) location '/dsap/rawdata/user/20120416';
drwxr-xr-x - root supergroup 0 2012-04-13 18:02 /dsap/rawdata/user/20120416 路径不变
tricky one
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120417/user.txt
alter table ext_user1 add partition (ds='ok') location '/dsap/rawdata/user/20120417';
drwxr-xr-x - root supergroup 0 2012-04-13 18:19 /dsap/rawdata/user/20120417 路径不变
conclusion alter table add partition(分区=值) location ''; 不改变hdfs路径
外部表分区添加数据方式代码示例
最新推荐文章于 2022-03-31 10:13:58 发布