一.分桶表的创建
create table 表名(
id int,
name string
)
clustered by (id) //选择上面字段作为字段
into 桶数 buckets
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n';
//注意分桶时,桶数为偶数
二.insert 方式将数据导入分桶表
insert into table 分桶名 from 数据表;
三.抽样查询
(1)意思查询是从第三个桶,随机抽取32份当中一份数据
select * from 分桶表名 tablesample(bucket 3 out of 32 on id) s;
//注意y要大于x
三.随机抽样基于指定列
select * from 分桶表名 tablesample(bucket 3 out of 32 on rand()) s;
四.随机抽样基于block size
SELECT * FROM table_name TABLESAMPLE(10 PERCENT) s;
SELECT * FROM table_name TABLESAMPLE(1M) s;
SELECT * FROM table_name TABLESAMPLE(10 rows) s;