文章目录
背景
Ceph的对象存储兼容s3协议,支持对象的整体上传和分段上传功能,详细可参考网上资料。本文主要是基于具体实践,观察整体上传和分段上传的差异。
本文结论
- 关键配置
- rgw_max_chunk_size:整体上传时使用,为对象文件拆分后第一个rados对象大小。
- rgw_obj_stripe_size:整体上传和分段上传均使用,为对象文件拆分后每个stripe的大小。
- 整体上传rados对象分布
rados对象 | size | 角色 |
---|---|---|
{bucketid} + ‘_’ + {objectname} | rgw_max_chunk_size | header对象,存放元数据也存放部分数据 |
{bucketid} + ‘_’ + ‘_shadow_’ + {objectprefix} + ‘_’ + {id} | rgw_obj_stripe_size | 存放数据 |
- 分段上传rados对象分布
rados对象 | size | 角色 |
---|---|---|
{bucketid} + ‘_’ + {objectname} | 0 | header对象,存放元数据 |
{bucketid} + ‘_’ + ‘multipart’ + {objectprefix} + ‘_’ + {分段id} | rgw_obj_stripe_size | 每个分段第一个对象 |
{bucketid} + ‘_’ + ‘_shadow_’ + {objectprefix} + ‘_’ + {分段id} + {id} | rgw_obj_stripe_size | 每个分段其他对象 |
对象上传实践
环境准备
查询存储池配置rgw_max_chunk_size=2M和rgw_obj_stripe_size=8M
查询配置,其中rgw_max_chunk_size=2M,rgw_obj_stripe_size=8M。
[root@poctest_100 /data/objtest]# ceph daemon /var/run/ceph/*.asok config show | grep "rgw_obj_stripe_size\|rgw_max_chunk_size"
"rgw_max_chunk_size": "2097152",
"rgw_obj_stripe_size": "8388608",
创建一个bucket,名字为testbucket0
[root@poctest_100 /data/objtest]# s3cmd mb s3://testbucket0
Bucket 's3://testbucket0/' created
分段上传分析
创建一个100M的文件file_100M.bin,上传至testbucket0中,分段size为30M
[root@poctest_100 /data/objtest]# s3cmd put file_100M.bin s3://testbucket0 --multipart-chunk-size-mb=30
upload: 'file_100M.bin' -> 's3://testbucket0/file_100M.bin' [part 1 of 4, 30MB] [1 of 1]
31457280 of 31457280 100% in 0s 43.98 MB/s done
upload: 'file_100M.bin' -> 's3://testbucket0/file_100M.bin' [part 2 of 4, 30MB] [1 of 1]
31457280 of 31457280 100% in 0s 41.81 MB/s done
upload: 'file_100M.bin' -> 's3://testbucket0/file_100M.bin' [part 3 of 4, 30MB] [1 of 1]
31457280 of 31457280 100% in 0s 40.51 MB/s done
upload: 'file_100M.bin' -> 's3://testbucket0/file_100M.bin' [part 4 of 4, 10MB] [1 of 1]
10485760 of 10485760 100% in 0s 30.69 MB/s done
获取testbucket0的id
[root@poctest_100 /data/objtest]# radosgw-admin bucket stats | grep testbucket0 -A 20
"bucket": "testbucket0",
"num_shards": 128,
"tenant": "",
"zonegroup": "63a80505-8c43-43e8-b1c3-b7a600fabdfe",
"placement_rule": "objpool0",
"explicit_placement": {
"data_pool": "",
"data_tail_pool": "",
"data_extra_pool": "",
"index_pool": ""
},
"index_type": "Normal",
"id": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1", // testbucket0的id
"marker": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"index_type": "Normal",
"owner": "admin",
查询manifest对象
bucketid = e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1
manifest对象名:{bucketid} + _ + {objectname} = e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1_file_100M.bin
[root@poctest_100 /data/objtest]# rados -p 00000000-default.rgw.buckets.data listxattr e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1_file_100M.bin user.rgw.manifest
user.rgw.acl
user.rgw.content_type
user.rgw.etag
user.rgw.idtag
user.rgw.manifest
user.rgw.pg_ver
user.rgw.source_zone
user.rgw.tail_tag
user.rgw.x-amz-date
user.rgw.x-amz-meta-s3cmd-attrs
[root@poctest_100 /data/objtest]# rados -p 00000000-default.rgw.buckets.data getxattr e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1_file_100M.bin user.rgw.manifest > manifest.file_100M.bin.txt
[root@poctest_100 /data/objtest]# ceph-dencoder import 'manifest.file_100M.bin.txt' type RGWObjManifest decode dump_json
{
"objs": [],
"obj_size": 104857600,
"explicit_objs": "false",
"head_size": 0,
"max_head_size": 0,
"prefix": "file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR", // prefix
"rules": [
{
"key": 0,
"val": {
"start_part_num": 1,
"start_ofs": 0,
"part_size": 31457280,
"stripe_max_size": 8388608,
"override_prefix": ""
}
},
{
"key": 94371840,
"val": {
"start_part_num": 4,
"start_ofs": 94371840,
"part_size": 10485760,
"stripe_max_size": 8388608,
"override_prefix": ""
}
}
],
"tail_instance": "",
"tail_placement": {
"bucket": {
"name": "testbucket0",
"marker": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"bucket_id": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_tail_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"placement_rule": "objpool0"
},
"begin_iter": {
"part_ofs": 0,
"stripe_ofs": 0,
"ofs": 0,
"stripe_size": 8388608,
"cur_part_id": 1,
"cur_stripe": 0,
"cur_override_prefix": "",
"location": {
"placement_rule": "objpool0",
"obj": {
"bucket": {
"name": "testbucket0",
"marker": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"bucket_id": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_tail_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"key": {
"name": "file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1", // 第一个分段{prefix}.1
"instance": "",
"ns": "multipart"
}
},
"raw_obj": {
"pool": "",
"oid": "",
"loc": ""
},
"is_raw": false
}
},
"end_iter": {
"part_ofs": 104857600,
"stripe_ofs": 104857600,
"ofs": 104857600,
"stripe_size": 8388608,
"cur_part_id": 5,
"cur_stripe": 0,
"cur_override_prefix": "",
"location": {
"placement_rule": "objpool0",
"obj": {
"bucket": {
"name": "testbucket0",
"marker": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"bucket_id": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_tail_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"key": {
"name": "file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.5", // 最后一个分段{prefix}.5,共4个分段
"instance": "",
"ns": "multipart"
}
},
"raw_obj": {
"pool": "",
"oid": "",
"loc": ""
},
"is_raw": false
}
}
}
查看rados对象
[root@poctest_100 /data/objtest]# rados -p 00000000-default.rgw.buckets.data ls | grep "file_100M.bin"
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1_file_100M.bin
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1_3
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1_2
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.2_2
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__multipart_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.3
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__multipart_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.2
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.2_1
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__multipart_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.3_3
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.2_3
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__multipart_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.4
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.3_2
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.4_1
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1_1
e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.3_1
根据观察对象size可知,对象分段为:30M (8M + 8M + 8M + 6M) + 30M (8M + 8M + 8M + 6M) + 30M (8M + 8M + 8M + 6M) + 10M (8M + 2M),即上传时分成30M + 30M + 30M + 10M四个分段,每个30M分段根据rgw_obj_stripe_size切分成8M + 8M + 8M + 6M。
[root@poctest_100 /data/objtest]# for obj in `rados -p 00000000-default.rgw.buckets.data ls | grep "2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR"`; do rados -p 00000000-default.rgw.buckets.data stat ${obj}; done
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1_file_100M.bin mtime 2022-07-04 11:15:45.000000, size 0
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__multipart_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1 mtime 2022-07-04 11:15:43.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1_1 mtime 2022-07-04 11:15:43.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1_2 mtime 2022-07-04 11:15:43.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.1_3 mtime 2022-07-04 11:15:43.000000, size 6291456
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__multipart_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.2 mtime 2022-07-04 11:15:44.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.2_1 mtime 2022-07-04 11:15:43.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.2_2 mtime 2022-07-04 11:15:43.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.2_3 mtime 2022-07-04 11:15:44.000000, size 6291456
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__multipart_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.3 mtime 2022-07-04 11:15:44.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.3_1 mtime 2022-07-04 11:15:44.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.3_2 mtime 2022-07-04 11:15:44.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.3_3 mtime 2022-07-04 11:15:44.000000, size 6291456
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__multipart_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.4 mtime 2022-07-04 11:15:45.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_file_100M.bin.2~ynp_RQ9m3Pevoh_CXDTcDujtdD8_4NR.4_1 mtime 2022-07-04 11:15:45.000000, size 2097152
整体上传分析
创建一个50M的文件file_50M.bin,上传至testbucket0,关闭分段
[root@poctest_100 /data/objtest]# s3cmd put file_50M.bin s3://testbucket0 --disable-multipart
upload: 'file_50M.bin' -> 's3://testbucket0/file_50M.bin' [1 of 1]
52428800 of 52428800 100% in 0s 53.54 MB/s done
查询manifest对象
[root@poctest_100 /data/objtest]# rados -p 00000000-default.rgw.buckets.data listxattr e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1_file_50M.bin user.rgw.manifest
user.rgw.acl
user.rgw.content_type
user.rgw.etag
user.rgw.idtag
user.rgw.manifest
user.rgw.pg_ver
user.rgw.source_zone
user.rgw.storage_class
user.rgw.tail_tag
user.rgw.x-amz-date
user.rgw.x-amz-meta-s3cmd-attrs
[root@poctest_100 /data/objtest]# rados -p 00000000-default.rgw.buckets.data getxattr e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1_file_50M.bin user.rgw.manifest > manifest.file_50M.bin.txt
[root@poctest_100 /data/objtest]# ceph-dencoder import 'manifest.file_50M.bin.txt' type RGWObjManifest decode dump_json
{
"objs": [],
"obj_size": 52428800,
"explicit_objs": "false",
"head_size": 2097152,
"max_head_size": 2097152,
"prefix": ".-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_",
"rules": [
{
"key": 0,
"val": {
"start_part_num": 0,
"start_ofs": 2097152,
"part_size": 0,
"stripe_max_size": 8388608,
"override_prefix": ""
}
}
],
"tail_instance": "",
"tail_placement": {
"bucket": {
"name": "testbucket0",
"marker": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"bucket_id": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_tail_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"placement_rule": "objpool0"
},
"begin_iter": {
"part_ofs": 0,
"stripe_ofs": 0,
"ofs": 0,
"stripe_size": 2097152,
"cur_part_id": 0,
"cur_stripe": 0,
"cur_override_prefix": "",
"location": {
"placement_rule": "objpool0",
"obj": {
"bucket": {
"name": "testbucket0",
"marker": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"bucket_id": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_tail_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"key": {
"name": "file_50M.bin", // 第一个分片
"instance": "",
"ns": ""
}
},
"raw_obj": {
"pool": "",
"oid": "",
"loc": ""
},
"is_raw": false
}
},
"end_iter": {
"part_ofs": 2097152,
"stripe_ofs": 52428800,
"ofs": 52428800,
"stripe_size": 0,
"cur_part_id": 0,
"cur_stripe": 7,
"cur_override_prefix": "",
"location": {
"placement_rule": "objpool0",
"obj": {
"bucket": {
"name": "testbucket0",
"marker": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"bucket_id": "e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_tail_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"key": {
"name": ".-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_7", // 最后一个分片
"instance": "",
"ns": "shadow"
}
},
"raw_obj": {
"pool": "",
"oid": "",
"loc": ""
},
"is_raw": false
}
}
}
查看rados对象
可以看到数据切分成2M + 8M + 8M + 8M + 8M + 8M + 8M。
[root@poctest_100 /data]# for obj in `rados -p 00000000-default.rgw.buckets.data ls | grep "file_50M.bin\|.-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_"`; do rados -p 00000000-default.rgw.buckets.data stat ${obj}; done
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1_file_50M.bin mtime 2022-07-04 14:48:10.000000, size 2097152
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_.-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_4 mtime 2022-07-04 14:48:10.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_.-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_2 mtime 2022-07-04 14:48:10.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_.-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_6 mtime 2022-07-04 14:48:10.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_.-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_1 mtime 2022-07-04 14:48:10.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_.-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_3 mtime 2022-07-04 14:48:10.000000, size 8388608
00000000-default.rgw.buckets.data/e8392725-d4f8-4d37-940f-eb948c81c3a3.686977.1__shadow_.-ORMlsBRTV9kAJWl3GjTDqpRnZNJyxS_5 mtime 2022-07-04 14:48:10.000000, size 8388608
参考文献
《Ceph设计原理与实现》