1.将数据放入分区表ip1(fadd,sadd,tadd,fouadd)中并进行分区,(区号:firstadd),即:ip第一位
2.将数据放入分桶排序(正序)表ip2,分8个桶
3.按照fadd查出第2,5桶
4.按照fadd查找第2,4,6,8桶
1.将数据放入分区表ip1(fadd,sadd,tadd,fouadd)中并进行分区,(区号:firstadd),即:ip第一位
create external table if not exists ip1(
fadd int,
asdd int,
tadd int,
fouadd int
)
partitioned by (firstadd int)
row format delimited fields terminated by '.'
lines terminated by '\n'
load data local inpath '/root/ip/ip1.txt' into table ip.ip1
partition(firstadd=192)
load data local inpath '/root/ip/ip2.txt' into table ip.ip1
partition(firstadd=132)
load data local inpath '/root/ip/ip3.txt' into table ip.ip1
partition(firstadd=122)
load data local inpath '/root/ip/ip4.txt' into table ip.ip1
partition(firstadd=182)
2.将数据放入分桶排序(正序)表ip2,分8个桶
分桶
第一种:
create external table if not exists ip2(
fadd int,
asdd int,
tadd int,
fouadd int
)
clustered by (fadd)
sorted by (fadd) into 8 buckets
row format delimited fields terminated by '.'
lines terminated by '\n'
set mapreduce.job.reduces=8
load data local inpath '/root/ip/ip1.txt' into table ip.ip2
load data local inpath '/root/ip/ip2.txt' into table ip.ip2
load data local inpath '/root/ip/ip3.txt' into table ip.ip2
load data local inpath '/root/ip/ip4.txt' into table ip.ip2
第二种:
create external table if not exists t_ip2(
fadd int,
asdd int,
tadd int,
fouadd int
)
row format delimited fields terminated by '.'
lines terminated by '\n'
load data local inpath '/root/ip/ip1.txt' into table ip.t_ip2;
load data local inpath '/root/ip/ip2.txt' into table ip.t_ip2;
load data local inpath '/root/ip/ip3.txt' into table ip.t_ip2;
load data local inpath '/root/ip/ip4.txt' into table ip.t_ip2;
insert overwrite table ip2
select fadd,asdd,tadd,fouadd
from t_ip2
set hive.exec.mode.local.auto=true;
create external table if not exists ip3(
fadd int,
asdd int,
tadd int,
fouadd int
)
clustered by (asdd)
sorted by (asdd) into 8 buckets
row format delimited fields terminated by '.'
lines terminated by '\n'
insert overwrite table ip3
select fadd,asdd,tadd,fouadd
from t_ip2
3.按照fadd查出第2,5桶
select * from ip3 tablesample(bucket 2 out of 4 on asdd);
4.按照fadd查找第2,4,6,8桶
select * from ip3 tablesample(bucket 2 out of 2 on asdd);