Webrtc FrameBuffer实现原理

最新推荐文章于 2023-11-02 17:27:08 发布

bytedance-踏锋

最新推荐文章于 2023-11-02 17:27:08 发布

阅读量2k

点赞数

分类专栏： webrtc学习

本文链接：https://blog.csdn.net/liwf616/article/details/100568185

版权

webrtc学习专栏收录该内容

12 篇文章 0 订阅

订阅专栏

Webrtc FrameBuffer实现原理

1. InsertFrame

// Insert a frame into the frame buffer. Returns the picture id
// of the last continuous frame or -1 if there is no continuous frame.
// TODO(philipel): Return a VideoLayerFrameId and not only the picture id.
  
int64_t FrameBuffer::InsertFrame(std::unique_ptr<EncodedFrame> frame) {
  TRACE_EVENT0("webrtc", "FrameBuffer::InsertFrame");
  RTC_DCHECK(frame);

  if (stats_callback_)
    stats_callback_->OnCompleteFrame(frame->is_keyframe(), frame->size(),
                                     frame->contentType());
  const VideoLayerFrameId& id = frame->id;

  rtc::CritScope lock(&crit_);

  int64_t last_continuous_picture_id =
      !last_continuous_frame_ ? -1 : last_continuous_frame_->picture_id;

  // 如果前向参考不存在，则返回上一次的last_continuous_picture_id
  if (!ValidReferences(*frame)) {
    RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
                        << id.picture_id << ":"
                        << static_cast<int>(id.spatial_layer)
                        << ") has invalid frame references, dropping frame.";
    return last_continuous_picture_id;
  }
  
  // 如果缓存满了，则判断当前是否为关键帧，如果是则清空之前的数据
  // 否则，返回上一次的last_continuous_picture_id
  if (frames_.size() >= kMaxFramesBuffered) {
    if (frame->is_keyframe()) {
      RTC_LOG(LS_WARNING) << "Inserting keyframe (picture_id:spatial_id) ("
                          << id.picture_id << ":"
                          << static_cast<int>(id.spatial_layer)
                          << ") but buffer is full, clearing"
                          << " buffer and inserting the frame.";
      ClearFramesAndHistory();
    } else {
      RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
                          << id.picture_id << ":"
                          << static_cast<int>(id.spatial_layer)
                          << ") could not be inserted due to the frame "
                          << "buffer being full, dropping frame.";
      return last_continuous_picture_id;
    }
  }

  //如果当前frames到达的时候，已经开始解析后面的frame，则直接丢弃该frame
  auto last_decoded_frame = decoded_frames_history_.GetLastDecodedFrameId();
  auto last_decoded_frame_timestamp =
      decoded_frames_history_.GetLastDecodedFrameTimestamp();
  
  if (last_decoded_frame && id <= *last_decoded_frame) {
    if (AheadOf(frame->Timestamp(), *last_decoded_frame_timestamp) &&
        frame->is_keyframe()) {
      // If this frame has a newer timestamp but an earlier picture id then we
      // assume there has been a jump in the picture id due to some encoder
      // reconfiguration or some other reason. Even though this is not according
      // to spec we can still continue to decode from this frame if it is a
      // keyframe.
      RTC_LOG(LS_WARNING)
          << "A jump in picture id was detected, clearing buffer.";
      ClearFramesAndHistory();
      last_continuous_picture_id = -1;
    } else {
      RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
                          << id.picture_id << ":"
                          << static_cast<int>(id.spatial_layer)
                          << ") inserted after frame ("
                          << last_decoded_frame->picture_id << ":"
                          << static_cast<int>(last_decoded_frame->spatial_layer)
                          << ") was handed off for decoding, dropping frame.";
      return last_continuous_picture_id;
    }
  }

  // Test if inserting this frame would cause the order of the frames to become
  // ambiguous (covering more than half the interval of 2^16). This can happen
  // when the picture id make large jumps mid stream.
  if (!frames_.empty() && id < frames_.begin()->first &&
      frames_.rbegin()->first < id) {
    RTC_LOG(LS_WARNING)
        << "A jump in picture id was detected, clearing buffer.";
    ClearFramesAndHistory();
    last_continuous_picture_id = -1;
  }

  //如果该frame已经收到过，则直接丢弃
  auto info = frames_.emplace(id, FrameInfo()).first;

  if (info->second.frame) {
    RTC_LOG(LS_WARNING) << "Frame with (picture_id:spatial_id) ("
                        << id.picture_id << ":"
                        << static_cast<int>(id.spatial_layer)
                        << ") already inserted, dropping frame.";
    return last_continuous_picture_id;
  }
  
  //状态更新
  if (stats_callback_)
    stats_callback_->OnFrameInserted(frame->is_keyframe(), frame->size(),
                                   frame->contentType(), frames_.size());
  //判断是否还依赖其他的frame
  if (!UpdateFrameInfoWithIncomingFrame(*frame, info))
    return last_continuous_picture_id;
  
  UpdatePlayoutDelays(*frame);

  info->second.frame = std::move(frame);

  //到这一步骤，说明我们已经得到了一个可以立马播放的frame了
  if (info->second.num_missing_continuous == 0) {
    info->second.continuous = true;
    PropagateContinuity(info);
    last_continuous_picture_id = last_continuous_frame_->picture_id;

    // Since we now have new continuous frames there might be a better frame
    // to return from NextFrame. Signal that thread so that it again can choose
    // which frame to return.
    new_continuous_frame_event_.Set();
  }

  return last_continuous_picture_id;
}

2.NextFrame

// Get the next frame for decoding. Will return at latest after
// |max_wait_time_ms|.
//  - If a frame is available within |max_wait_time_ms| it will return
//    kFrameFound and set |frame_out| to the resulting frame.
//  - If no frame is available after |max_wait_time_ms| it will return
//    kTimeout.
//  - If the FrameBuffer is stopped then it will return kStopped.
// 1. 如果一个frame已经准备好了，则返回kFrameFound，并且frame数据存储在frame_out指针中。
// 2. 如果在max_wait_time_ms的等待时间中，仍然没有数据准备好，则返回kTimeout
// 3. 如果FrameBuffer 被关闭，则返回 kStopped
  
FrameBuffer::ReturnReason FrameBuffer::NextFrame(
    int64_t max_wait_time_ms,
    std::unique_ptr<EncodedFrame>* frame_out,
    bool keyframe_required) {
    
  // 这个函数最终可以执行的结束时间deadline
  int64_t latest_return_time_ms =
      clock_->TimeInMilliseconds() + max_wait_time_ms;
      
  int64_t wait_ms = max_wait_time_ms;
  int64_t now_ms = 0;

  do {
    now_ms = clock_->TimeInMilliseconds();
    {
      rtc::CritScope lock(&crit_);
      new_continuous_frame_event_.Reset();
      if (stopped_)
        return kStopped;

      keyframe_required_ = keyframe_required;
      latest_return_time_ms_ = latest_return_time_ms;
      wait_ms = FindNextFrame(now_ms);
    }
  } while (new_continuous_frame_event_.Wait(wait_ms));

  {
    rtc::CritScope lock(&crit_);

	 //读取到一个frame，则返回kFrameFound
    if (!frames_to_decode_.empty()) {
      frame_out->reset(GetNextFrame());
      return kFrameFound;
    }
  }

  //如果上面执行结束了，发现还有时间，则继续执行一次
  if (latest_return_time_ms - clock_->TimeInMilliseconds() > 0) {
    // If |next_frame_it_ == frames_.end()| and there is still time left, it
    // means that the frame buffer was cleared as the thread in this function
    // was waiting to acquire |crit_| in order to return. Wait for the
    // remaining time and then return.
    return NextFrame(latest_return_time_ms - now_ms, frame_out,
                     keyframe_required);
  }
  
  //否则，返回超时
  return kTimeout;
}

3 FindNextFrame(int64_t now_ms)

int64_t FrameBuffer::FindNextFrame(int64_t now_ms) {
  int64_t wait_ms = latest_return_time_ms_ - now_ms;
  frames_to_decode_.clear();

  // |last_continuous_frame_| may be empty below, but nullopt is smaller
  // than everything else and loop will immediately terminate as expected.
  for (auto frame_it = frames_.begin();
       frame_it != frames_.end() && frame_it->first <= last_continuous_frame_;
       ++frame_it) {
    if (!frame_it->second.continuous ||
        frame_it->second.num_missing_decodable > 0) {
      continue;
    }

    EncodedFrame* frame = frame_it->second.frame.get();

	 // 等关键帧
    if (keyframe_required_ && !frame->is_keyframe())
      continue;

    auto last_decoded_frame_timestamp =
        decoded_frames_history_.GetLastDecodedFrameTimestamp();
	
	 //从最后一个decodeb_frame开始解析数据
    // TODO(https://bugs.webrtc.org/9974): consider removing this check
    // as it may make a stream undecodable after a very long delay between
    // frames.
    if (last_decoded_frame_timestamp &&
        AheadOf(*last_decoded_frame_timestamp, frame->Timestamp())) {
      continue;
    }

    // Only ever return all parts of a superframe. Therefore skip this
    // frame if it's not a beginning of a superframe.
    if (frame->inter_layer_predicted) {
      continue;
    }

    // Gather all remaining frames for the same superframe.
    std::vector<FrameMap::iterator> current_superframe;
    current_superframe.push_back(frame_it);
    bool last_layer_completed = frame_it->second.frame->is_last_spatial_layer;
    FrameMap::iterator next_frame_it = frame_it;
    while (true) {
      ++next_frame_it;
      if (next_frame_it == frames_.end() ||
          next_frame_it->first.picture_id != frame->id.picture_id ||
          !next_frame_it->second.continuous) {
        break;
      }
      // Check if the next frame has some undecoded references other than
      // the previous frame in the same superframe.
      size_t num_allowed_undecoded_refs =
          (next_frame_it->second.frame->inter_layer_predicted) ? 1 : 0;
      if (next_frame_it->second.num_missing_decodable >
          num_allowed_undecoded_refs) {
        break;
      }
      // All frames in the superframe should have the same timestamp.
      if (frame->Timestamp() != next_frame_it->second.frame->Timestamp()) {
        RTC_LOG(LS_WARNING) << "Frames in a single superframe have different"
                               " timestamps. Skipping undecodable superframe.";
        break;
      }
      current_superframe.push_back(next_frame_it);
      last_layer_completed = next_frame_it->second.frame->is_last_spatial_layer;
    }
    // Check if the current superframe is complete.
    // TODO(bugs.webrtc.org/10064): consider returning all available to
    // decode frames even if the superframe is not complete yet.
    if (!last_layer_completed) {
      continue;
    }

    frames_to_decode_ = std::move(current_superframe);

    if (frame->RenderTime() == -1) {
      frame->SetRenderTime(timing_->RenderTimeMs(frame->Timestamp(), now_ms));
    }
    wait_ms = timing_->MaxWaitingTime(frame->RenderTime(), now_ms);

    // This will cause the frame buffer to prefer high framerate rather
    // than high resolution in the case of the decoder not decoding fast
    // enough and the stream has multiple spatial and temporal layers.
    // For multiple temporal layers it may cause non-base layer frames to be
    // skipped if they are late.
    if (wait_ms < -kMaxAllowedFrameDelayMs)
      continue;

    break;
  }
  
  wait_ms = std::min<int64_t>(wait_ms, latest_return_time_ms_ - now_ms);
  wait_ms = std::max<int64_t>(wait_ms, 0);
  return wait_ms;
}

##4 UpdatePlayoutDelays

webrtc规范参考

void FrameBuffer::UpdatePlayoutDelays(const EncodedFrame& frame) {
  TRACE_EVENT0("webrtc", "FrameBuffer::UpdatePlayoutDelays");
  PlayoutDelay playout_delay = frame.EncodedImage().playout_delay_;
  
  if (playout_delay.min_ms >= 0)
    timing_->set_min_playout_delay(playout_delay.min_ms);

  if (playout_delay.max_ms >= 0)
    timing_->set_max_playout_delay(playout_delay.max_ms);

  //该包是否被重传过? 每一个frame的times_nacked_记录的是每一个frame里面所有的packet中重传次数的最大值
  //如果被重传过，则直接更新timing_, 简单的理解就是把音视频的时间轴、播放器系统时间做一个映射。
  
  //Used to report that a frame is passed to decoding. Updates the timestamp
  //filter which is used to map between timestamps and receiver system time.
  
  if (!frame.delayed_by_retransmission())
    timing_->IncomingTimestamp(frame.Timestamp(), frame.ReceivedTime());
}

##5 GetNextFrame()

EncodedFrame* FrameBuffer::GetNextFrame() {
  int64_t now_ms = clock_->TimeInMilliseconds();
  // TODO(ilnik): remove |frames_out| use frames_to_decode_ directly.
  std::vector<EncodedFrame*> frames_out;

  RTC_DCHECK(!frames_to_decode_.empty());
  bool superframe_delayed_by_retransmission = false;
  size_t superframe_size = 0;
  EncodedFrame* first_frame = frames_to_decode_[0]->second.frame.get();
  
  //获取该frame的可渲染的时间
  int64_t render_time_ms = first_frame->RenderTime();
  int64_t receive_time_ms = first_frame->ReceivedTime();
  // Gracefully handle bad RTP timestamps and render time issues.
  if (HasBadRenderTiming(*first_frame, now_ms)) {
    jitter_estimator_.Reset();
    timing_->Reset();
    render_time_ms = timing_->RenderTimeMs(first_frame->Timestamp(), now_ms);
  }

  // **核心逻辑**
  for (FrameMap::iterator& frame_it : frames_to_decode_) {
    RTC_DCHECK(frame_it != frames_.end());
    EncodedFrame* frame = frame_it->second.frame.release();

	 //decode frame里面所有帧的渲染时间都是第一个frame的渲染时间
    frame->SetRenderTime(render_time_ms);

    superframe_delayed_by_retransmission |= frame->delayed_by_retransmission();
    receive_time_ms = std::max(receive_time_ms, frame->ReceivedTime());
    superframe_size += frame->size();

    PropagateDecodability(frame_it->second);
    decoded_frames_history_.InsertDecoded(frame_it->first, frame->Timestamp());

    // Remove decoded frame and all undecoded frames before it.
    if (stats_callback_) {
      unsigned int dropped_frames = std::count_if(
          frames_.begin(), frame_it,
          [](const std::pair<const VideoLayerFrameId, FrameInfo>& frame) {
            return frame.second.frame != nullptr;
          });
      if (dropped_frames > 0) {
        stats_callback_->OnDroppedFrames(dropped_frames);
      }
    }

    //从frames_列表里面删除该frame
    frames_.erase(frames_.begin(), ++frame_it);

    frames_out.push_back(frame);
  }

  if (!superframe_delayed_by_retransmission) {
    int64_t frame_delay;

    if (inter_frame_delay_.CalculateDelay(first_frame->Timestamp(),
                                          &frame_delay, receive_time_ms)) {
      jitter_estimator_.UpdateEstimate(frame_delay, superframe_size);
    }

    float rtt_mult = protection_mode_ == kProtectionNackFEC ? 0.0 : 1.0;
    absl::optional<float> rtt_mult_add_cap_ms = absl::nullopt;
    if (rtt_mult_settings_.has_value()) {
      rtt_mult = rtt_mult_settings_->rtt_mult_setting;
      rtt_mult_add_cap_ms = rtt_mult_settings_->rtt_mult_add_cap_ms;
    }
    timing_->SetJitterDelay(
        jitter_estimator_.GetJitterEstimate(rtt_mult, rtt_mult_add_cap_ms));
    timing_->UpdateCurrentDelay(render_time_ms, now_ms);
  } else {
    if (RttMultExperiment::RttMultEnabled() || add_rtt_to_playout_delay_)
      jitter_estimator_.FrameNacked();
  }

  UpdateJitterDelay();
  UpdateTimingFrameInfo();

  // **这个地方可以返回一个数组!!**
  if (frames_out.size() == 1) {
    return frames_out[0];
  } else {
    return CombineAndDeleteFrames(frames_out);
  }
}

RtpFrameObject数据结构

class RtpFrameObject : public EncodedFrame {
 public:
  RtpFrameObject(PacketBuffer* packet_buffer,
                 uint16_t first_seq_num,
                 uint16_t last_seq_num,
                 size_t frame_size,
                 int times_nacked,
                 int64_t first_packet_received_time,
                 int64_t last_packet_received_time,
                 RtpPacketInfos packet_infos);

  ~RtpFrameObject() override;
  uint16_t first_seq_num() const;
  uint16_t last_seq_num() const;
  int times_nacked() const;
  VideoFrameType frame_type() const;
  VideoCodecType codec_type() const;
  int64_t ReceivedTime() const override;
  int64_t RenderTime() const override;
  bool delayed_by_retransmission() const override;
  const RTPVideoHeader& GetRtpVideoHeader() const;
  const absl::optional<RtpGenericFrameDescriptor>& GetGenericFrameDescriptor()
      const;
  const FrameMarking& GetFrameMarking() const;

 private:
  RTPVideoHeader rtp_video_header_;
  absl::optional<RtpGenericFrameDescriptor> rtp_generic_frame_descriptor_;
  VideoCodecType codec_type_;
  uint16_t first_seq_num_;  //起始seq num
  uint16_t last_seq_num_;   //最后seq num
  int64_t last_packet_received_time_;  //最后一个packet接收到的时间

  // Equal to times nacked of the packet with the highet times nacked
  // belonging to this frame.
  int times_nacked_;   		//这个frame中packet的最大重传次数
};