以4M作为一次写rados的上限 写10M数据
// 假设写10M数据,顺序如下:[0,4M]保存到first_chunk中,[4M,8M]写入rados,[8M,10M]小于4M通过chunk写入rados
// 第一次写 4M, logical_offset = 0
int HeadObjectProcessor::process(bufferlist&& data, uint64_t logical_offset)
{
// 第一次:data.length = 4M
const bool flush = (data.length() == 0);
// 第一次:data_offset = 0
if (data_offset < head_chunk_size || data_offset == 0) {
if (flush) {
return process_first_chunk(std::move(head_data), &processor);
}
auto remaining = head_chunk_size - data_offset;
auto count = std::min<uint64_t>(data.length(), remaining);
data.splice(0, count, &head_data); // 此后data.len=0
data_offset += count;// data_offset = 4M
// 第一次:data_offset = head_chunk_size = 4M
if (data_offset == head_chunk_size) {
ceph_assert(head_data.length() == head_chunk_size);
// first_chunk = std::move(data); *processor = StripeProcessor(&chunk, this, head_max_size);
// 将第一个4M保存在first_chunk中,等待complete的时候才真正写入rados --- 01
int r = process_first_chunk(std::move(head_data), &processor);
}
if (data.length() == 0) { // avoid flushing stripe processor
return 0;
}
}
ceph_assert(processor); // process_first_chunk() must initialize
// 第二次:write_offset = 0 ---> 4M
auto write_offset = data_offset;
// 第二次:data_offset = 4M ---> 8M
data_offset += data.length();
// StripeProcessor::process --- 02
return processor->process(std::move(data), write_offset);
}
---------------------------------------------- 01
int AppendObjectProcessor::process_first_chunk(bufferlist &&data, rgw::putobj::DataProcessor **processor) {
// RadosWriter::write_exclusive
int r = writer.write_exclusive(data);
*processor = &stripe; // StripeProcessor(&chunk, this, 4M);
return 0;
}
---------------------------------------------- 02
int StripeProcessor::process(bufferlist&& data, uint64_t offset)
{
const bool flush = (data.length() == 0);
if (flush) {
return Pipe::process({}, offset - bounds.first);
}
// 第二次 bounds.second = 4M , bounds.first = 0 offset = 4M max = 0
auto max = bounds.second - offset;
while (data.length() > max) {
// 写[8M,10M]时成立
if (max > 0) {
bufferlist bl;
data.splice(0, max, &bl);
int r = Pipe::process(std::move(bl), offset - bounds.first);
offset += max;
}
int r = Pipe::process({}, offset - bounds.first);
// generate the next stripe
uint64_t stripe_size;
// ManifestObjectProcessor::next
r = gen->next(offset, &stripe_size);
ceph_assert(stripe_size > 0);
bounds.first = offset;
bounds.second = offset + stripe_size;
max = stripe_size; // 0 ---> 4M, 循环结束
}
if (data.length() == 0) { // don't flush the chunk here
return 0;
}
// ChunkProcessor::process ---- 03
return Pipe::process(std::move(data), offset - bounds.first);
}
---------------------------------------------- 03
// ChunkProcessor(&writer, chunk_size); RadosWriter writer;
int ChunkProcessor::process(bufferlist&& data, uint64_t offset) {
ceph_assert(offset >= chunk.length());
uint64_t position = offset - chunk.length();
const bool flush = (data.length() == 0);
if (flush) {
if (chunk.length() > 0) {
// RadosWriter::process ---- 04
int r = Pipe::process(std::move(chunk), position);
}
return Pipe::process({}, offset);
}
chunk.claim_append(data);
// write each full chunk
while (chunk.length() >= chunk_size) {
bufferlist bl;
chunk.splice(0, chunk_size, &bl);
int r = Pipe::process(std::move(bl), position);
position += chunk_size;
}
return 0;
}
---------------------------------------------- 04
int RadosWriter::process(bufferlist&& bl, uint64_t offset) {
bufferlist data = std::move(bl);
const uint64_t cost = data.length();
if (cost == 0) {
return 0;
}
librados::ObjectWriteOperation op;
if (offset == 0) {
op.write_full(data);
} else {
op.write(offset, data);
}
constexpr uint64_t id = 0;
auto c = aio->get(stripe_obj, Aio::librados_op(std::move(op), y), cost, id);
return process_completed(c, &written);
}