Pipe核心分析及消息体结构-writer

1 篇文章 0 订阅

Pipe核心分析及消息体结构

  1. Pipe是什么
    Pipe是SimpleMessenger中的一个复杂的组件,每个Pipe实例都拥有工作在socket上的两个线程,分别负责socket连接上的读取和写入的数据传输,除此之外还会将socket上的错误信息汇报给SimpleMessenger,同时处于一个稳定状态,来持续不断的为SimpleMessenger提供数据服务。

  2. Pipe的不同状态

    STATE_ACCEPTING,
    STATE_CONNECTING,
    STATE_OPEN,
    STATE_STANDBY,
    STATE_CLOSED,
    STATE_CLOSING,
    STATE_WAIT
  3. Pipe::write()将消息在socket上发生出去
    主要处理逻辑过程:

    1. 只要Pipe的状态不为STATE_CLOSED,Pipe就一直处于writer的主循环中(异常除外)。
    2. 如果当前Pipe实例的状态为STATE_STANDBY,且不属于服务,有消息要发送,则将Pipe置为STATE_CONNECTING。
    3. 如果Pipe实例的状态为STATE_CONNECTING,则调用connect()连接服务端,(注:只有客户端才允许调用connect())。
    4. 如果Pipe的状态为STATE_CLOSING将要关闭,则向对应端发送tag(CEPH_MSGR_TAG_CLOSE)。
    5. 如果Pipe实例的状态不是STATE_CONNECTING STATE_WAIT STATE_STANDBY其中的任意一个且有消息需要发送,或者存在为ack的消息则根据具体情况发送消息。
    6. 假设有正常的消息需要发送,则从out_q中取出一个消息。
    7. 将message中的header和footer取出来,并将front、middle、data序列化到bufferlist中。
    8. 调用write_message()将header、footer、序列化后的数据进一步处理。
    9. 如果Pipe实例状态没有被关闭则,继续主循环,负责结束writer。
  4. Pipe::reader的函数调用

 Pipe::writer()
     >Pipe::connect()
     >Pipe::write()//发送tag 关闭pipe是调用
     >Pipe::write_keepalive2() //需要keepalive是调用 或ack keepalive时调用
     >Pipe::write_ack //ack message时调用
     >Pipe::_get_next_outgoing() //从out_q中取出需要发送的消息 or
     >Pipe::write_message() //发送消息
         >Pipe::do_sendmsg() //发送tag、header、data(front middle data) footer
             >Pipe::sendmsg() //syscall

`
5. Pipe::writer部分源代码

void Pipe::writer()
{
  pipe_lock.Lock();
  //1.只要Pipe的状态不为STATE_CLOSED,Pipe就一直处于writer的主循环中(异常除外)。
  while (state != STATE_CLOSED) {// && state != STATE_WAIT) {
  ¦ ldout(msgr->cct,10) << "writer: state = " << get_state_name()
                        << " policy.server=" << policy.server << dendl;

//2.如果当前Pipe实例的状态为STATE_STANDBY,且不属于服务,有消息要发送,则将Pipe置为STATE_CONNECTING。
  ¦ // standby?
  ¦ if (is_queued() && state == STATE_STANDBY && !policy.server)
  ¦ ¦ state = STATE_CONNECTING;

  //3.如果Pipe实例的状态为STATE_CONNECTING,则调用connect()连接服务端,(注:只有客户端才允许调用connect())。
  ¦ // connect?
  ¦ if (state == STATE_CONNECTING) {
  ¦ ¦ assert(!policy.server);
  ¦ ¦ connect();
  ¦ ¦ continue;
  ¦ }
  ¦ //4.如果Pipe的状态为STATE_CLOSING将要关闭,则向对应端发送tag(CEPH_MSGR_TAG_CLOSE)。
  ¦ if (state == STATE_CLOSING) {
  ¦ ¦ // write close tag
  ¦ ¦ ldout(msgr->cct,20) << "writer writing CLOSE tag" << dendl;
  ¦ ¦ char tag = CEPH_MSGR_TAG_CLOSE;
  ¦ ¦ state = STATE_CLOSED;
  ¦ ¦ state_closed.set(1);
  ¦ ¦ pipe_lock.Unlock();
  ¦ ¦ if (sd) {
        int r = ::write(sd, &tag, 1);
        // we can ignore r, actually; we don't care if this succeeds.
        r++; r = 0; // placate gcc
  ¦ ¦ }
  ¦ ¦ pipe_lock.Lock();
  ¦ ¦ continue;
  ¦ }
//5.如果Pipe实例的状态不是STATE_CONNECTING STATE_WAIT STATE_STANDBY其中的任意一个且有消息需要发送,或者存在为ack的消息则根据具体情况发送消息。
  ¦ if (state != STATE_CONNECTING && state != STATE_WAIT && state != STATE_STANDBY &&
        (is_queued() || in_seq > in_seq_acked)) {

  ¦ ¦ // keepalive?
  ¦ ¦ if (send_keepalive) {
        int rc;                                                                                                                                                                                                   
        if (connection_state->has_feature(CEPH_FEATURE_MSGR_KEEPALIVE2)) {
        ¦ pipe_lock.Unlock();
        ¦ rc = write_keepalive2(CEPH_MSGR_TAG_KEEPALIVE2,
                                ceph_clock_now(msgr->cct));
        } else {
        ¦ pipe_lock.Unlock();
        ¦ rc = write_keepalive();
        }
        pipe_lock.Lock();
        if (rc < 0) {
        ¦ ldout(msgr->cct,2) << "writer couldn't write keepalive[2], "
                             << cpp_strerror(errno) << dendl;
        ¦ fault();
        ¦ continue;
        }
        send_keepalive = false;
  ¦ ¦ }
  //ack keepalive2
  ¦ ¦ if (send_keepalive_ack) {
        utime_t t = keepalive_ack_stamp;
        pipe_lock.Unlock();
        int rc = write_keepalive2(CEPH_MSGR_TAG_KEEPALIVE2_ACK, t);
        pipe_lock.Lock();
        if (rc < 0) {
        ¦ ldout(msgr->cct,2) << "writer couldn't write keepalive_ack, " << cpp_strerror(errno) << dendl;
        ¦ fault();
        ¦ continue;
        }
        send_keepalive_ack = false;
  ¦ ¦ }

  ¦ ¦ // send ack?
  ¦ ¦ if (in_seq > in_seq_acked) {
        uint64_t send_seq = in_seq;
        pipe_lock.Unlock();
        //ack message
        int rc = write_ack(send_seq);
        pipe_lock.Lock();
        if (rc < 0) {
        ¦ ldout(msgr->cct,2) << "writer couldn't write ack, " << cpp_strerror(errno) << dendl;
        ¦ fault();
        ¦ continue;
        }
        in_seq_acked = send_seq;                                                                                                                                                                                  
  ¦ ¦ }
//6.假设有正常的消息需要发送,则从out_q中取出一个消息。
  ¦ ¦ // grab outgoing message 取出一个待发送的message
  ¦ ¦ Message *m = _get_next_outgoing();
  //重新构建message
  ¦ ¦ if (m) {
        m->set_seq(++out_seq);
        if (!policy.lossy) {
        ¦ // put on sent list
        ¦ sent.push_back(m); 
        ¦ m->get();
        }

        // associate message with Connection (for benefit of encode_payload)
        m->set_connection(connection_state.get());

        uint64_t features = connection_state->get_features();

        if (m->empty_payload())
        ¦ ldout(msgr->cct,20) << "writer encoding " << m->get_seq() << " features " << features
                              << " " << m << " " << *m << dendl;
        else
        ¦ ldout(msgr->cct,20) << "writer half-reencoding " << m->get_seq() << " features " << features
                              << " " << m << " " << *m << dendl;

        // encode and copy out of *m
        m->encode(features, msgr->crcflags);
//7.将message中的header和footer取出来,并将front、middle、data序列化到bufferlist中。
        // prepare everything
        ceph_msg_header& header = m->get_header();
        ceph_msg_footer& footer = m->get_footer();

        // Now that we have all the crcs calculated, handle the
        // digital signature for the message, if the pipe has session
        // security set up.  Some session security options do not
        // actually calculate and check the signature, but they should
        // handle the calls to sign_message and check_signature.  PLR
        if (session_security.get() == NULL) {
        ¦ ldout(msgr->cct, 20) << "writer no session security" << dendl;
        } else {
        ¦ if (session_security->sign_message(m)) {                                                                                                                                                                
        ¦ ¦ ldout(msgr->cct, 20) << "writer failed to sign seq # " << header.seq
                                 << "): sig = " << footer.sig << dendl;
        ¦ } else {
        ¦ ¦ ldout(msgr->cct, 20) << "writer signed seq # " << header.seq
                                 << "): sig = " << footer.sig << dendl;
        ¦ }
        }

        bufferlist blist = m->get_payload();
        blist.append(m->get_middle());
        blist.append(m->get_data());

  ¦ ¦ ¦ pipe_lock.Unlock();

  ¦ ¦ ¦ ldout(msgr->cct,20) << "writer sending " << m->get_seq() << " " << m << dendl;
  //8.调用write_message()将header、footer、序列化后的数据进一步处理。
        int rc = write_message(header, footer, blist);

        pipe_lock.Lock();
        if (rc < 0) {
  ¦ ¦ ¦ ¦ ldout(msgr->cct,1) << "writer error sending " << m << ", "
                ¦ << cpp_strerror(errno) << dendl;
        ¦ fault();
  ¦ ¦ ¦ }
        m->put();
  ¦ ¦ }
  ¦ ¦ continue;
  ¦ }
  ¦ 
  ¦ // wait
  ¦ ldout(msgr->cct,20) << "writer sleeping" << dendl;
  ¦ cond.Wait(pipe_lock);
  }
  //9.如果Pipe实例状态没有被关闭则,继续主循环,负责结束writer。
  ldout(msgr->cct,20) << "writer finishing" << dendl;

  // reap?
  writer_running = false;
  unlock_maybe_reap();
  ldout(msgr->cct,10) << "writer done" << dendl;
}
int Pipe::write_message(ceph_msg_header& header, ceph_msg_footer& footer, bufferlist& blist)
{
  int ret;

  // set up msghdr and iovecs
  struct msghdr msg;
  memset(&msg, 0, sizeof(msg));
  msg.msg_iov = msgvec;
  int msglen = 0;

  // send tag 填充tag
  char tag = CEPH_MSGR_TAG_MSG;
  msgvec[msg.msg_iovlen].iov_base = &tag;
  msgvec[msg.msg_iovlen].iov_len = 1;
  msglen++;
  msg.msg_iovlen++;

  // send envelope 填充header
  ceph_msg_header_old oldheader;
  if (connection_state->has_feature(CEPH_FEATURE_NOSRCADDR)) {
  ¦ msgvec[msg.msg_iovlen].iov_base = (char*)&header;
  ¦ msgvec[msg.msg_iovlen].iov_len = sizeof(header);
  ¦ msglen += sizeof(header);
  ¦ msg.msg_iovlen++;
  } else {
  ¦ memcpy(&oldheader, &header, sizeof(header));
  ¦ oldheader.src.name = header.src;
  ¦ oldheader.src.addr = connection_state->get_peer_addr();
  ¦ oldheader.orig_src = oldheader.src;
  ¦ oldheader.reserved = header.reserved;
  ¦ if (msgr->crcflags & MSG_CRC_HEADER) {
        oldheader.crc = ceph_crc32c(0, (unsigned char*)&oldheader,
                                    sizeof(oldheader) - sizeof(oldheader.crc));
  ¦ } else {
        oldheader.crc = 0;
  ¦ }
  ¦ msgvec[msg.msg_iovlen].iov_base = (char*)&oldheader;
  ¦ msgvec[msg.msg_iovlen].iov_len = sizeof(oldheader);
  ¦ msglen += sizeof(oldheader);
  ¦ msg.msg_iovlen++;                                                                                                                                                                                             
  }

  // payload (front+data) 填充front、middle、data
  list<bufferptr>::const_iterator pb = blist.buffers().begin();
  int b_off = 0;  // carry-over buffer offset, if any
  int bl_pos = 0; // blist pos
  int left = blist.length();

  while (left > 0) {
  ¦ int donow = MIN(left, (int)pb->length()-b_off);
  ¦ if (donow == 0) {
  ¦ ¦ ldout(msgr->cct,0) << "donow = " << donow << " left " << left << " pb->length " << pb->length()
        ¦ ¦ ¦ << " b_off " << b_off << dendl;
  ¦ }
  ¦ assert(donow > 0);
  ¦ ldout(msgr->cct,30) << " bl_pos " << bl_pos << " b_off " << b_off
        ¦ ¦ ¦<< " leftinchunk " << left
        ¦ ¦ ¦<< " buffer len " << pb->length()
        ¦ ¦ ¦<< " writing " << donow 
        ¦ ¦ ¦<< dendl;
  ¦ 
  ¦ if (msg.msg_iovlen >= IOV_MAX-2) {
  ¦ ¦ if (do_sendmsg(&msg, msglen, true))
        goto fail;
  ¦ ¦ 
  ¦ ¦ // and restart the iov
  ¦ ¦ msg.msg_iov = msgvec;
  ¦ ¦ msg.msg_iovlen = 0;
  ¦ ¦ msglen = 0;
  ¦ }
  ¦ 
  ¦ msgvec[msg.msg_iovlen].iov_base = (void*)(pb->c_str()+b_off);
  ¦ msgvec[msg.msg_iovlen].iov_len = donow;
  ¦ msglen += donow;
  ¦ msg.msg_iovlen++;
  ¦ 
  ¦ left -= donow;
  ¦ assert(left >= 0);                                                                                                                                                                                            
  ¦ b_off += donow;
  ¦ bl_pos += donow;
  ¦ if (left == 0)
  ¦ ¦ break;
  ¦ while (b_off == (int)pb->length()) {
  ¦ ¦ ++pb;
  ¦ ¦ b_off = 0;
  ¦ }
  }
  assert(left == 0);

  // send footer; if receiver doesn't support signatures, use the old footer format
//填充footer
  ceph_msg_footer_old old_footer;
  if (connection_state->has_feature(CEPH_FEATURE_MSG_AUTH)) {
  ¦ msgvec[msg.msg_iovlen].iov_base = (void*)&footer;
  ¦ msgvec[msg.msg_iovlen].iov_len = sizeof(footer);
  ¦ msglen += sizeof(footer);
  ¦ msg.msg_iovlen++;
  } else {
  ¦ if (msgr->crcflags & MSG_CRC_HEADER) {
  ¦ ¦ old_footer.front_crc = footer.front_crc;
  ¦ ¦ old_footer.middle_crc = footer.middle_crc;
  ¦ } else {
        old_footer.front_crc = old_footer.middle_crc = 0;
  ¦ }
  ¦ old_footer.data_crc = msgr->crcflags & MSG_CRC_DATA ? footer.data_crc : 0;
  ¦ old_footer.flags = footer.flags;   
  ¦ msgvec[msg.msg_iovlen].iov_base = (char*)&old_footer;
  ¦ msgvec[msg.msg_iovlen].iov_len = sizeof(old_footer);
  ¦ msglen += sizeof(old_footer);
  ¦ msg.msg_iovlen++;
  }

  // send 将填充在msg中的数据发送
  if (do_sendmsg(&msg, msglen))
  ¦ goto fail;

  ret = 0;

 out:
  return ret;                                                                                                                                                                                                     

 fail:
  ret = -1;
  goto out;
}
int Pipe::do_sendmsg(struct msghdr *msg, int len, bool more)
{
  while (len > 0) {
  ¦ if (0) { // sanity                                                                                                                                                                                            
  ¦ ¦ int l = 0;
  ¦ ¦ for (unsigned i=0; i<msg->msg_iovlen; i++)
        l += msg->msg_iov[i].iov_len;
  ¦ ¦ assert(l == len);
  ¦ }

  ¦ int r = ::sendmsg(sd, msg, MSG_NOSIGNAL | (more ? MSG_MORE : 0));
  ¦ if (r == 0) 
  ¦ ¦ ldout(msgr->cct,10) << "do_sendmsg hmm do_sendmsg got r==0!" << dendl;
  ¦ if (r < 0) { 
  ¦ ¦ ldout(msgr->cct,1) << "do_sendmsg error " << cpp_strerror(errno) << dendl;
  ¦ ¦ return -1;
  ¦ }
  ¦ if (state == STATE_CLOSED) {
  ¦ ¦ ldout(msgr->cct,10) << "do_sendmsg oh look, state == CLOSED, giving up" << dendl;
  ¦ ¦ errno = EINTR;
  ¦ ¦ return -1; // close enough
  ¦ }

  ¦ len -= r;
  ¦ if (len == 0) break;
  ¦ 
  ¦ // hrmph.  trim r bytes off the front of our message. //保证数据完全发送
  ¦ ldout(msgr->cct,20) << "do_sendmsg short write did " << r << ", still have " << len << dendl;
  ¦ while (r > 0) {
  ¦ ¦ if (msg->msg_iov[0].iov_len <= (size_t)r) {
        // lose this whole item
        //ldout(msgr->cct,30) << "skipping " << msg->msg_iov[0].iov_len << ", " << (msg->msg_iovlen-1) << " v, " << r << " left" << dendl;
        r -= msg->msg_iov[0].iov_len;
        msg->msg_iov++;
        msg->msg_iovlen--;
  ¦ ¦ } else {
        // partial!
        //ldout(msgr->cct,30) << "adjusting " << msg->msg_iov[0].iov_len << ", " << msg->msg_iovlen << " v, " << r << " left" << dendl;
        msg->msg_iov[0].iov_base = (char *)msg->msg_iov[0].iov_base + r;
        msg->msg_iov[0].iov_len -= r;
        break;
  ¦ ¦ }
  ¦ }
  }
  return 0;                                                                                                                                                                                                       
}

`
6. 总结
Pipe::writer的主要任务就是从out_q中取出消息,通过socket,将消息安全稳定发送出去。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值