AWS - Redshift - Unload 数据到S3产生的文件名

当 Redshift Unload 数据时,文件名称会根据表的分配方式及数据分布有关。

实验一: 表分配方式为 ALL,数据存在在多个节点,无法预计从哪个节点导出数据。默认情况下,表数据量小时,按ALL分配,变大之后改为EVEN

-- Create Table
testdb=# create table test_unload_19_all (id int);
CREATE TABLE
-- Insert row
testdb=# insert into test_unload_19_all values (0);
INSERT 0 1
-- Table Diststyle
testdb=# select "table", diststyle from pg_catalog.svv_table_info where "table" = 'test_unload_19_all';
       table        | diststyle
--------------------+-----------
 test_unload_19_all | AUTO(ALL)
(1 row)
-- Table rows in slices
testdb=# select trim(name) as table, stv_blocklist.slice, stv_tbl_perm.rows
from stv_blocklist,stv_tbl_perm
where stv_blocklist.tbl=stv_tbl_perm.id
and stv_tbl_perm.slice=stv_blocklist.slice
and stv_blocklist.id > 10000 and name not like '%#m%'
and name not like 'systable%'
and name = 'test_unload_19_all' -- tablename
group by name, stv_blocklist.slice, stv_tbl_perm.rows
order by 2 desc;
       table        | slice | rows
--------------------+-------+------
 test_unload_19_all |     6 |    1
 test_unload_19_all |     4 |    1
 test_unload_19_all |     2 |    1
 test_unload_19_all |     0 |    1
(4 rows)
-- Unload to S3
testdb=# unload ('select * from test_unload_19_all')
testdb-# to 's3://XXXXX/XXXXX/test_unload_19_all'
testdb-# iam_role 'arn:aws-cn:iam::000000000000:role/XXXXX'
testdb-# format as parquet
testdb-# allowoverwrite;
INFO:  UNLOAD completed, 1 record(s) unloaded successfully.
UNLOAD

– Unload file name
test_unload_19_all0002_part_00.parquet

-- Unload to S3 again
testdb=# unload ('select * from test_unload_19_all')
testdb-# to 's3://XXXXX/XXXXX/test_unload_19_all'
testdb-# iam_role 'arn:aws-cn:iam::000000000000:role/XXXXX'
testdb-# format as parquet
testdb-# allowoverwrite;
INFO:  UNLOAD completed, 1 record(s) unloaded successfully.
UNLOAD

– Unload file name
test_unload_19_all0004_part_00.parquet

由于数据存在在 切片 0,2,4,6 中,所以无法预计本次导出时从哪个文件中导出。

实验二:表分配方式为 KEY , EVEN,数据仅保存在一个分片中,则每次导出结果为一致。

testdb=# create table test_unload_19_even (id int) diststyle even;
CREATE TABLE
testdb=# insert into test_unload_19_even values (0);
INSERT 0 1
testdb=# select "table", diststyle from pg_catalog.svv_table_info where "table" = 'test_unload_19_even';
        table        | diststyle
---------------------+-----------
 test_unload_19_even | EVEN
(1 row)
testdb=# select trim(name) as table, stv_blocklist.slice, stv_tbl_perm.rows
from stv_blocklist,stv_tbl_perm
where stv_blocklist.tbl=stv_tbl_perm.id
and stv_tbl_perm.slice=stv_blocklist.slice
and stv_blocklist.id > 10000 and name not like '%#m%'
and name not like 'systable%'
and name = 'test_unload_19_even' -- tablename
group by name, stv_blocklist.slice, stv_tbl_perm.rows
order by 2 desc;
        table        | slice | rows
---------------------+-------+------
 test_unload_19_even |     4 |    1
(1 row)
-- Unload to S3
testdb=# unload ('select * from test_unload_19_even')
testdb-# to 's3://XXXXX/XXXXX/test_unload_19_even'
testdb-# iam_role 'arn:aws-cn:iam::000000000000:role/XXXXX'
testdb-# format as parquet
testdb-# allowoverwrite;
INFO:  UNLOAD completed, 1 record(s) unloaded successfully.
UNLOAD
-- Unload file name
test_unload_19_even0004_part_00.parquet
-- Unload to S3
testdb=# unload ('select * from test_unload_19_even')
testdb-# to 's3://XXXXX/XXXXX/test_unload_19_even'
testdb-# iam_role 'arn:aws-cn:iam::000000000000:role/XXXXX'
testdb-# format as parquet
testdb-# allowoverwrite;
INFO:  UNLOAD completed, 1 record(s) unloaded successfully.
UNLOAD
-- Unload file name 
test_unload_19_even0004_part_00.parquet
-- 分配方式为Key
testdb=# create table test_unload_19_key (id int) diststyle key distkey (id);
CREATE TABLE
testdb=# insert into test_unload_19_key values (0);
INSERT 0 1
testdb=# select "table", diststyle from pg_catalog.svv_table_info where "table" = 'test_unload_19_key';
       table        | diststyle
--------------------+-----------
 test_unload_19_key | KEY(id)
(1 row)
testdb=# select trim(name) as table, stv_blocklist.slice, stv_tbl_perm.rows
testdb-# from stv_blocklist,stv_tbl_perm
testdb-# where stv_blocklist.tbl=stv_tbl_perm.id
testdb-# and stv_tbl_perm.slice=stv_blocklist.slice
testdb-# and stv_blocklist.id > 10000 and name not like '%#m%'
testdb-# and name not like 'systable%'
testdb-# and name = 'test_unload_19_key' -- tablename
testdb-# group by name, stv_blocklist.slice, stv_tbl_perm.rows
testdb-# order by 2 desc;
       table        | slice | rows
--------------------+-------+------
 test_unload_19_key |     7 |    1
(1 row)
testdb=# unload ('select * from test_unload_19_key')
testdb-# to 's3://chen-redshift-spectrum/redshift-unload/test_unload_19_key'
testdb-# iam_role 'arn:aws-cn:iam::524560386974:role/chen-redshift-role'
testdb-# format as parquet
testdb-# allowoverwrite;
INFO:  UNLOAD completed, 1 record(s) unloaded successfully.
UNLOAD

– file name
test_unload_19_key0007_part_00.parquet

当表的分配方式为 Key 与 Even 时,多次导出文件名称不变。只会跟切片有关。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值