/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
#define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
#include "webrtc/common_types.h"
#include "webrtc/typedefs.h"
/******************************************************/
/* Quality Modes: Resolution and Robustness settings */
/******************************************************/
namespace webrtc {
struct VideoContentMetrics;
struct VCMResolutionScale {
VCMResolutionScale()
: codec_width(640),
codec_height(480),
frame_rate(30.0f),
spatial_width_fact(1.0f),
spatial_height_fact(1.0f),
temporal_fact(1.0f),
change_resolution_spatial(false),
change_resolution_temporal(false) {}
uint16_t codec_width;
uint16_t codec_height;
float frame_rate;
float spatial_width_fact;
float spatial_height_fact;
float temporal_fact;
bool change_resolution_spatial;
bool change_resolution_temporal;
};
enum ImageType {
kQCIF = 0, // 176x144
kHCIF, // 264x216 = half(~3/4x3/4) CIF.
kQVGA, // 320x240 = quarter VGA.
kCIF, // 352x288
kHVGA, // 480x360 = half(~3/4x3/4) VGA.
kVGA, // 640x480
kQFULLHD, // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
kWHD, // 1280x720
kFULLHD, // 1920x1080
kNumImageTypes
};
const uint32_t kSizeOfImageType[kNumImageTypes] = {
25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600};
enum FrameRateLevelClass {
kFrameRateLow,
kFrameRateMiddle1,
kFrameRateMiddle2,
kFrameRateHigh
};
enum ContentLevelClass { kLow, kHigh, kDefault };
struct VCMContFeature {
VCMContFeature() : value(0.0f), level(kDefault) {}
void Reset() {
value = 0.0f;
level = kDefault;
}
float value;
ContentLevelClass level;
};
enum UpDownAction { kUpResolution, kDownResolution };
enum SpatialAction {
kNoChangeSpatial,
kOneHalfSpatialUniform, // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
kOneQuarterSpatialUniform, // 1/2 x 1/2: 1/4 pixel reduction.
kNumModesSpatial
};
enum TemporalAction {
kNoChangeTemporal,
kTwoThirdsTemporal, // 2/3 frame rate reduction
kOneHalfTemporal, // 1/2 frame rate reduction
kNumModesTemporal
};
struct ResolutionAction {
ResolutionAction() : spatial(kNoChangeSpatial), temporal(kNoChangeTemporal) {}
SpatialAction spatial;
TemporalAction temporal;
};
// Down-sampling factors for spatial (width and height), and temporal.
const float kFactorWidthSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f};
const float kFactorHeightSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f};
const float kFactorTemporal[kNumModesTemporal] = {1.0f, 1.5f, 2.0f};
enum EncoderState {
kStableEncoding, // Low rate mis-match, stable buffer levels.
kStressedEncoding, // Significant over-shooting of target rate,
// Buffer under-flow, etc.
kEasyEncoding // Significant under-shooting of target rate.
};
// QmMethod class: main class for resolution and robustness settings
class VCMQmMethod {
public:
VCMQmMethod();
virtual ~VCMQmMethod();
// Reset values
void ResetQM();
virtual void Reset() = 0;
// Compute content class.
//获取内容分析等级(1~27)
uint8_t ComputeContentClass();
// Update with the content metrics.
//设置content_metrics
void UpdateContent(const VideoContentMetrics* content_metrics);
// Compute spatial texture magnitude and level.
// Spatial texture is a spatial prediction error measure.
//计算分辨率
void ComputeSpatial();
// Compute motion magnitude and level for NFD metric.
// NFD is normalized frame difference (normalized by spatial variance).
void ComputeMotionNFD();
// Get the imageType (CIF, VGA, HD, etc) for the system width/height.
//根据图片字节获取图片类型(高清->低清)
ImageType GetImageType(uint16_t width, uint16_t height);
// Return the closest image type.
//根据图片字节获取图片类型(高清->低清)
ImageType FindClosestImageType(uint16_t width, uint16_t height);
// Get the frame rate level.
//获取帧速率等级(低中高)
FrameRateLevelClass FrameRateLevel(float frame_rate);
protected:
// Content Data.
const VideoContentMetrics* content_metrics_;
// Encoder frame sizes and native frame sizes.
uint16_t width_;
uint16_t height_;
float user_frame_rate_;
uint16_t native_width_;
uint16_t native_height_;
float native_frame_rate_;
float aspect_ratio_;
// Image type and frame rate leve, for the current encoder resolution.
ImageType image_type_;
FrameRateLevelClass framerate_level_;
// Content class data.
VCMContFeature motion_;
VCMContFeature spatial_;
uint8_t content_class_;
bool init_;
};
// Resolution settings class
class VCMQmResolution : public VCMQmMethod {
public:
VCMQmResolution();
virtual ~VCMQmResolution();
// Reset all quantities.
//重置所有
virtual void Reset();
// Reset rate quantities and counters after every SelectResolution() call.
//重置帧率
void ResetRates();
// Reset down-sampling state.
//重置采样装菜
void ResetDownSamplingState();
// Get the encoder state.
//获取编码状态(稳定/强/简单)
EncoderState GetEncoderState();
// Initialize after SetEncodingData in media_opt.
//初始化编码器参数
int Initialize(float bitrate,
float user_framerate,
uint16_t width,
uint16_t height,
int num_layers);
// Update the encoder frame size.
//更新编解码参数
void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height);
// Update with actual bit rate (size of the latest encoded frame)
// and frame type, after every encoded frame.
//更新编码帧一段时间内的总大小
void UpdateEncodedSize(size_t encoded_size);
// Update with new target bitrate, actual encoder sent rate, frame_rate,
// loss rate: every ~1 sec from SetTargetRates in media_opt.
//更新帧率
void UpdateRates(float target_bitrate,
float encoder_sent_rate,
float incoming_framerate,
uint8_t packet_loss);
// Extract ST (spatio-temporal) resolution action.
// Inputs: qm: Reference to the quality modes pointer.
// Output: the spatial and/or temporal scale change.
//选择分辨率
int SelectResolution(VCMResolutionScale** qm);
private:
// Set the default resolution action.
//设置当前状态至默认数据
void SetDefaultAction();
// Compute rates for the selection of down-sampling action.
//降低分辨率或者帧率
void ComputeRatesForSelection();
// Compute the encoder state.
//更改编码器状态
void ComputeEncoderState();
// Return true if the action is to go back up in resolution.
//提升分辨率或者帧率
bool GoingUpResolution();
// Return true if the action is to go down in resolution.
//降低分辨率或者帧率
bool GoingDownResolution();
// Check the condition for going up in resolution by the scale factors:
// |facWidth|, |facHeight|, |facTemp|.
// |scaleFac| is a scale factor for the transition rate.
//检查是否需要做提升操作
bool ConditionForGoingUp(float fac_width,
float fac_height,
float fac_temp,
float scale_fac);
// Get the bitrate threshold for the resolution action.
// The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action.
// |scaleFac| is a scale factor for the transition rate.
//获取流量阈值
float GetTransitionRate(float fac_width,
float fac_height,
float fac_temp,
float scale_fac);
// Update the down-sampling state.
//更改VCMResolutionScale系数
void UpdateDownsamplingState(UpDownAction up_down);
// Update the codec frame size and frame rate.
//更新分辨率数据
void UpdateCodecResolution();
// Return a state based on average target rate relative transition rate.
//获取rateClass级数
uint8_t RateClass(float transition_rate);
// Adjust the action selected from the table.
//调整动作参数
void AdjustAction();
// Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2.
//切换当前状态(换挡)
void ConvertSpatialFractionalToWhole();
// Returns true if the new frame sizes, under the selected spatial action,
// are of even size.
//检查分辨率是否是偶数系
bool EvenFrameSize();
// Insert latest down-sampling action into the history list.
//将当前Action插入列表中
void InsertLatestDownAction();
// Remove the last (first element) down-sampling action from the list.
//移除最后一次插入的Action
void RemoveLastDownAction();
// Check constraints on the amount of down-sampling allowed.
//约束当前状态更改,避免超标
void ConstrainAmountOfDownSampling();
// For going up in resolution: pick spatial or temporal action,
// if both actions were separately selected.
//选择分辨率或者帧率操作
void PickSpatialOrTemporal();
// Select the directional (1x2 or 2x1) spatial down-sampling action.
void SelectSpatialDirectionMode(float transition_rate);
enum { kDownActionHistorySize = 10 };
VCMResolutionScale* qm_;
// Encoder rate control parameters.
float target_bitrate_; //对端流量
float incoming_framerate_; //当前使用的帧率
float per_frame_bandwidth_; //对端前一次期望帧的流量
float buffer_level_; //缓冲区等级
// Data accumulated every ~1sec from MediaOpt.
float sum_target_rate_; //总的对端流量
float sum_incoming_framerate_; //总的帧率
float sum_rate_MM_; //流量差异比例总量
float sum_rate_MM_sgn_; //双端流量差异状态(<0 对端流量过低 >0 本端流量过低) (用以决定编码强度)
float sum_packet_loss_; //总的丢包数
// Counters.
uint32_t frame_cnt_; //编码帧数
uint32_t update_rate_cnt_; //更新帧率次数
uint32_t low_buffer_cnt_; //低流量次数
// Resolution state parameters.
float state_dec_factor_spatial_; //空间削减系数(越小 削减的越多)
float state_dec_factor_temporal_; //帧率削减系数(越小 削减的越多)
// Quantities used for selection.
float avg_target_rate_; //平均的对端流量
float avg_incoming_framerate_; //平均帧率
float avg_ratio_buffer_low_; //平均低流量次数
float avg_rate_mismatch_; //失配率
float avg_rate_mismatch_sgn_; //失配率标志(本端还是对端)(<0 对端流量过低 >0 本端流量过低) (用以决定编码强度)
float avg_packet_loss_; //丢包平均数
EncoderState encoder_state_; //编码器状态(强中弱)
ResolutionAction action_; //分辨率操作
// Short history of the down-sampling actions from the Initialize() state.
// This is needed for going up in resolution. Since the total amount of
// down-sampling actions are constrained, the length of the list need not be
// large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
ResolutionAction down_action_history_[kDownActionHistorySize];//将分辨率帧率记录,用于后续回归提升
int num_layers_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/video_coding/qm_select.h"
#include <math.h>
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/video_coding/include/video_coding_defines.h"
#include "webrtc/modules/video_coding/internal_defines.h"
#include "webrtc/modules/video_coding/qm_select_data.h"
#include "webrtc/system_wrappers/include/trace.h"
namespace webrtc {
// QM-METHOD class
VCMQmMethod::VCMQmMethod()
: content_metrics_(NULL),
width_(0),
height_(0),
user_frame_rate_(0.0f),
native_width_(0),
native_height_(0),
native_frame_rate_(0.0f),
image_type_(kVGA),
framerate_level_(kFrameRateHigh),
init_(false) {
ResetQM();
}
VCMQmMethod::~VCMQmMethod() {}
void VCMQmMethod::ResetQM() {
aspect_ratio_ = 1.0f;
motion_.Reset();
spatial_.Reset();
content_class_ = 0;
}
uint8_t VCMQmMethod::ComputeContentClass() {
ComputeMotionNFD();
ComputeSpatial();
return content_class_ = 3 * motion_.level + spatial_.level;
}
void VCMQmMethod::UpdateContent(const VideoContentMetrics* contentMetrics) {
content_metrics_ = contentMetrics;
}
void VCMQmMethod::ComputeMotionNFD() {
if (content_metrics_) {
motion_.value = content_metrics_->motion_magnitude;
}
// Determine motion level.
if (motion_.value < kLowMotionNfd) {
motion_.level = kLow;
} else if (motion_.value > kHighMotionNfd) {
motion_.level = kHigh;
} else {
motion_.level = kDefault;
}
}
void VCMQmMethod::ComputeSpatial() {
float spatial_err = 0.0;
float spatial_err_h = 0.0;
float spatial_err_v = 0.0;
if (content_metrics_) {
spatial_err = content_metrics_->spatial_pred_err;
spatial_err_h = content_metrics_->spatial_pred_err_h;
spatial_err_v = content_metrics_->spatial_pred_err_v;
}
// Spatial measure: take average of 3 prediction errors.
spatial_.value = (spatial_err + spatial_err_h + spatial_err_v) / 3.0f;
// Reduce thresholds for large scenes/higher pixel correlation.
float scale2 = image_type_ > kVGA ? kScaleTexture : 1.0;
if (spatial_.value > scale2 * kHighTexture) {
spatial_.level = kHigh;
} else if (spatial_.value < scale2 * kLowTexture) {
spatial_.level = kLow;
} else {
spatial_.level = kDefault;
}
}
ImageType VCMQmMethod::GetImageType(uint16_t width, uint16_t height) {
// Get the image type for the encoder frame size.
uint32_t image_size = width * height;
if (image_size == kSizeOfImageType[kQCIF]) {
return kQCIF;
} else if (image_size == kSizeOfImageType[kHCIF]) {
return kHCIF;
} else if (image_size == kSizeOfImageType[kQVGA]) {
return kQVGA;
} else if (image_size == kSizeOfImageType[kCIF]) {
return kCIF;
} else if (image_size == kSizeOfImageType[kHVGA]) {
return kHVGA;
} else if (image_size == kSizeOfImageType[kVGA]) {
return kVGA;
} else if (image_size == kSizeOfImageType[kQFULLHD]) {
return kQFULLHD;
} else if (image_size == kSizeOfImageType[kWHD]) {
return kWHD;
} else if (image_size == kSizeOfImageType[kFULLHD]) {
return kFULLHD;
} else {
// No exact match, find closet one.
return FindClosestImageType(width, height);
}
}
ImageType VCMQmMethod::FindClosestImageType(uint16_t width, uint16_t height) {
float size = static_cast<float>(width * height);
float min = size;
int isel = 0;
for (int i = 0; i < kNumImageTypes; ++i) {
float dist = fabs(size - kSizeOfImageType[i]);
if (dist < min) {
min = dist;
isel = i;
}
}
return static_cast<ImageType>(isel);
}
FrameRateLevelClass VCMQmMethod::FrameRateLevel(float avg_framerate) {
if (avg_framerate <= kLowFrameRate) {
return kFrameRateLow;
} else if (avg_framerate <= kMiddleFrameRate) {
return kFrameRateMiddle1;
} else if (avg_framerate <= kHighFrameRate) {
return kFrameRateMiddle2;
} else {
return kFrameRateHigh;
}
}
// RESOLUTION CLASS
VCMQmResolution::VCMQmResolution() : qm_(new VCMResolutionScale()) {
Reset();
}
VCMQmResolution::~VCMQmResolution() {
delete qm_;
}
void VCMQmResolution::ResetRates() {
sum_target_rate_ = 0.0f;
sum_incoming_framerate_ = 0.0f;
sum_rate_MM_ = 0.0f;
sum_rate_MM_sgn_ = 0.0f;
sum_packet_loss_ = 0.0f;
buffer_level_ = kInitBufferLevel * target_bitrate_;
frame_cnt_ = 0;
frame_cnt_delta_ = 0;
low_buffer_cnt_ = 0;
update_rate_cnt_ = 0;
}
void VCMQmResolution::ResetDownSamplingState() {
state_dec_factor_spatial_ = 1.0;
state_dec_factor_temporal_ = 1.0;
for (int i = 0; i < kDownActionHistorySize; i++) {
down_action_history_[i].spatial = kNoChangeSpatial;
down_action_history_[i].temporal = kNoChangeTemporal;
}
}
void VCMQmResolution::Reset() {
target_bitrate_ = 0.0f;
incoming_framerate_ = 0.0f;
buffer_level_ = 0.0f;
per_frame_bandwidth_ = 0.0f;
avg_target_rate_ = 0.0f;
avg_incoming_framerate_ = 0.0f;
avg_ratio_buffer_low_ = 0.0f;
avg_rate_mismatch_ = 0.0f;
avg_rate_mismatch_sgn_ = 0.0f;
avg_packet_loss_ = 0.0f;
encoder_state_ = kStableEncoding;
num_layers_ = 1;
ResetRates();
ResetDownSamplingState();
ResetQM();
}
EncoderState VCMQmResolution::GetEncoderState() {
return encoder_state_;
}
// Initialize state after re-initializing the encoder,
// i.e., after SetEncodingData() in mediaOpt.
int VCMQmResolution::Initialize(float bitrate,
float user_framerate,
uint16_t width,
uint16_t height,
int num_layers) {
if (user_framerate == 0.0f || width == 0 || height == 0) {
return VCM_PARAMETER_ERROR;
}
Reset();
target_bitrate_ = bitrate;
incoming_framerate_ = user_framerate;
UpdateCodecParameters(user_framerate, width, height);
native_width_ = width;
native_height_ = height;
native_frame_rate_ = user_framerate;
num_layers_ = num_layers;
// Initial buffer level.
buffer_level_ = kInitBufferLevel * target_bitrate_;
// Per-frame bandwidth.
per_frame_bandwidth_ = target_bitrate_ / user_framerate;
init_ = true;
return VCM_OK;
}
void VCMQmResolution::UpdateCodecParameters(float frame_rate,
uint16_t width,
uint16_t height) {
width_ = width;
height_ = height;
// |user_frame_rate| is the target frame rate for VPM frame dropper.
user_frame_rate_ = frame_rate;
image_type_ = GetImageType(width, height);
}
// Update rate data after every encoded frame.
void VCMQmResolution::UpdateEncodedSize(size_t encoded_size) {
frame_cnt_++;
// Convert to Kbps.
float encoded_size_kbits = 8.0f * static_cast<float>(encoded_size) / 1000.0f;
// Update the buffer level:
// Note this is not the actual encoder buffer level.
// |buffer_level_| is reset to an initial value after SelectResolution is
// called, and does not account for frame dropping by encoder or VCM.
buffer_level_ += per_frame_bandwidth_ - encoded_size_kbits;
// Counter for occurrences of low buffer level:
// low/negative values means encoder is likely dropping frames.
if (buffer_level_ <= kPercBufferThr * kInitBufferLevel * target_bitrate_) {
low_buffer_cnt_++;
}
}
// Update various quantities after SetTargetRates in MediaOpt.
void VCMQmResolution::UpdateRates(float target_bitrate,
float encoder_sent_rate,
float incoming_framerate,
uint8_t packet_loss) {
// Sum the target bitrate: this is the encoder rate from previous update
// (~1sec), i.e, before the update for next ~1sec.
sum_target_rate_ += target_bitrate_;
update_rate_cnt_++;
// Sum the received (from RTCP reports) packet loss rates.
sum_packet_loss_ += static_cast<float>(packet_loss / 255.0);
// Sum the sequence rate mismatch:
// Mismatch here is based on the difference between the target rate
// used (in previous ~1sec) and the average actual encoding rate measured
// at previous ~1sec.
float diff = target_bitrate_ - encoder_sent_rate;
if (target_bitrate_ > 0.0)
sum_rate_MM_ += fabs(diff) / target_bitrate_;
int sgnDiff = diff > 0 ? 1 : (diff < 0 ? -1 : 0);
// To check for consistent under(+)/over_shooting(-) of target rate.
sum_rate_MM_sgn_ += sgnDiff;
// Update with the current new target and frame rate:
// these values are ones the encoder will use for the current/next ~1sec.
target_bitrate_ = target_bitrate;
incoming_framerate_ = incoming_framerate;
sum_incoming_framerate_ += incoming_framerate_;
// Update the per_frame_bandwidth:
// this is the per_frame_bw for the current/next ~1sec.
per_frame_bandwidth_ = 0.0f;
if (incoming_framerate_ > 0.0f) {
per_frame_bandwidth_ = target_bitrate_ / incoming_framerate_;
}
}
// Select the resolution factors: frame size and frame rate change (qm scales).
// Selection is for going down in resolution, or for going back up
// (if a previous down-sampling action was taken).
// In the current version the following constraints are imposed:
// 1) We only allow for one action, either down or up, at a given time.
// 2) The possible down-sampling actions are: spatial by 1/2x1/2, 3/4x3/4;
// temporal/frame rate reduction by 1/2 and 2/3.
// 3) The action for going back up is the reverse of last (spatial or temporal)
// down-sampling action. The list of down-sampling actions from the
// Initialize() state are kept in |down_action_history_|.
// 4) The total amount of down-sampling (spatial and/or temporal) from the
// Initialize() state (native resolution) is limited by various factors.
int VCMQmResolution::SelectResolution(VCMResolutionScale** qm) {
if (!init_) {
return VCM_UNINITIALIZED;
}
if (content_metrics_ == NULL) {
Reset();
*qm = qm_;
return VCM_OK;
}
// Check conditions on down-sampling state.
assert(state_dec_factor_spatial_ >= 1.0f);
assert(state_dec_factor_temporal_ >= 1.0f);
assert(state_dec_factor_spatial_ <= kMaxSpatialDown);
assert(state_dec_factor_temporal_ <= kMaxTempDown);
assert(state_dec_factor_temporal_ * state_dec_factor_spatial_ <=
kMaxTotalDown);
// Compute content class for selection.
content_class_ = ComputeContentClass();
// Compute various rate quantities for selection.
ComputeRatesForSelection();
// Get the encoder state.
ComputeEncoderState();
// Default settings: no action.
SetDefaultAction();
*qm = qm_;
// Check for going back up in resolution, if we have had some down-sampling
// relative to native state in Initialize().
if (down_action_history_[0].spatial != kNoChangeSpatial ||
down_action_history_[0].temporal != kNoChangeTemporal) {
if (GoingUpResolution()) {
*qm = qm_;
return VCM_OK;
}
}
// Check for going down in resolution.
if (GoingDownResolution()) {
*qm = qm_;
return VCM_OK;
}
return VCM_OK;
}
void VCMQmResolution::SetDefaultAction() {
qm_->codec_width = width_;
qm_->codec_height = height_;
qm_->frame_rate = user_frame_rate_;
qm_->change_resolution_spatial = false;
qm_->change_resolution_temporal = false;
qm_->spatial_width_fact = 1.0f;
qm_->spatial_height_fact = 1.0f;
qm_->temporal_fact = 1.0f;
action_.spatial = kNoChangeSpatial;
action_.temporal = kNoChangeTemporal;
}
void VCMQmResolution::ComputeRatesForSelection() {
avg_target_rate_ = 0.0f;
avg_incoming_framerate_ = 0.0f;
avg_ratio_buffer_low_ = 0.0f;
avg_rate_mismatch_ = 0.0f;
avg_rate_mismatch_sgn_ = 0.0f;
avg_packet_loss_ = 0.0f;
if (frame_cnt_ > 0) {
avg_ratio_buffer_low_ =
static_cast<float>(low_buffer_cnt_) / static_cast<float>(frame_cnt_);
}
if (update_rate_cnt_ > 0) {
avg_rate_mismatch_ =
static_cast<float>(sum_rate_MM_) / static_cast<float>(update_rate_cnt_);
avg_rate_mismatch_sgn_ = static_cast<float>(sum_rate_MM_sgn_) /
static_cast<float>(update_rate_cnt_);
avg_target_rate_ = static_cast<float>(sum_target_rate_) /
static_cast<float>(update_rate_cnt_);
avg_incoming_framerate_ = static_cast<float>(sum_incoming_framerate_) /
static_cast<float>(update_rate_cnt_);
avg_packet_loss_ = static_cast<float>(sum_packet_loss_) /
static_cast<float>(update_rate_cnt_);
}
// For selection we may want to weight some quantities more heavily
// with the current (i.e., next ~1sec) rate values.
avg_target_rate_ =
kWeightRate * avg_target_rate_ + (1.0 - kWeightRate) * target_bitrate_;
avg_incoming_framerate_ = kWeightRate * avg_incoming_framerate_ +
(1.0 - kWeightRate) * incoming_framerate_;
// Use base layer frame rate for temporal layers: this will favor spatial.
assert(num_layers_ > 0);
framerate_level_ = FrameRateLevel(avg_incoming_framerate_ /
static_cast<float>(1 << (num_layers_ - 1)));
}
void VCMQmResolution::ComputeEncoderState() {
// Default.
encoder_state_ = kStableEncoding;
// Assign stressed state if:
// 1) occurrences of low buffer levels is high, or
// 2) rate mis-match is high, and consistent over-shooting by encoder.
//30%处于低功率模式则编码器处于稳定状态
//编码码率高于对端接收码率50%则处于稳定状态
if ((avg_ratio_buffer_low_ > kMaxBufferLow) ||
((avg_rate_mismatch_ > kMaxRateMisMatch) &&
(avg_rate_mismatch_sgn_ < -kRateOverShoot))) {
encoder_state_ = kStressedEncoding;
}
// Assign easy state if:
// 1) rate mis-match is high, and
// 2) consistent under-shooting by encoder.
//编码码率低于对端接收码率50%则处理低功率状态
if ((avg_rate_mismatch_ > kMaxRateMisMatch) &&
(avg_rate_mismatch_sgn_ > kRateUnderShoot)) {
encoder_state_ = kEasyEncoding;
}
}
bool VCMQmResolution::GoingUpResolution() {
// For going up, we check for undoing the previous down-sampling action.
float fac_width = kFactorWidthSpatial[down_action_history_[0].spatial];
float fac_height = kFactorHeightSpatial[down_action_history_[0].spatial];
float fac_temp = kFactorTemporal[down_action_history_[0].temporal];
// For going up spatially, we allow for going up by 3/4x3/4 at each stage.
// So if the last spatial action was 1/2x1/2 it would be undone in 2 stages.
// Modify the fac_width/height for this case.
if (down_action_history_[0].spatial == kOneQuarterSpatialUniform) {
fac_width = kFactorWidthSpatial[kOneQuarterSpatialUniform] /
kFactorWidthSpatial[kOneHalfSpatialUniform];
fac_height = kFactorHeightSpatial[kOneQuarterSpatialUniform] /
kFactorHeightSpatial[kOneHalfSpatialUniform];
}
// Check if we should go up both spatially and temporally.
if (down_action_history_[0].spatial != kNoChangeSpatial &&
down_action_history_[0].temporal != kNoChangeTemporal) {
if (ConditionForGoingUp(fac_width, fac_height, fac_temp,
kTransRateScaleUpSpatialTemp)) {
action_.spatial = down_action_history_[0].spatial;
action_.temporal = down_action_history_[0].temporal;
UpdateDownsamplingState(kUpResolution);
return true;
}
}
// Check if we should go up either spatially or temporally.
bool selected_up_spatial = false;
bool selected_up_temporal = false;
if (down_action_history_[0].spatial != kNoChangeSpatial) {
selected_up_spatial = ConditionForGoingUp(fac_width, fac_height, 1.0f,
kTransRateScaleUpSpatial);
}
if (down_action_history_[0].temporal != kNoChangeTemporal) {
selected_up_temporal =
ConditionForGoingUp(1.0f, 1.0f, fac_temp, kTransRateScaleUpTemp);
}
if (selected_up_spatial && !selected_up_temporal) {
action_.spatial = down_action_history_[0].spatial;
action_.temporal = kNoChangeTemporal;
UpdateDownsamplingState(kUpResolution);
return true;
} else if (!selected_up_spatial && selected_up_temporal) {
action_.spatial = kNoChangeSpatial;
action_.temporal = down_action_history_[0].temporal;
UpdateDownsamplingState(kUpResolution);
return true;
} else if (selected_up_spatial && selected_up_temporal) {
PickSpatialOrTemporal();
UpdateDownsamplingState(kUpResolution);
return true;
}
return false;
}
bool VCMQmResolution::ConditionForGoingUp(float fac_width,
float fac_height,
float fac_temp,
float scale_fac) {
float estimated_transition_rate_up =
GetTransitionRate(fac_width, fac_height, fac_temp, scale_fac);
// Go back up if:
// 1) target rate is above threshold and current encoder state is stable, or
// 2) encoder state is easy (encoder is significantly under-shooting target).
//对端流量超过阈值同时编码器处理强功率
//编码器为低功率模式
if (((avg_target_rate_ > estimated_transition_rate_up) &&
(encoder_state_ == kStableEncoding)) ||
(encoder_state_ == kEasyEncoding)) {
return true;
} else {
return false;
}
}
bool VCMQmResolution::GoingDownResolution() {
float estimated_transition_rate_down =
GetTransitionRate(1.0f, 1.0f, 1.0f, 1.0f);
float max_rate = kFrameRateFac[framerate_level_] * kMaxRateQm[image_type_];
// Resolution reduction if:
// (1) target rate is below transition rate, or
// (2) encoder is in stressed state and target rate below a max threshold.
if ((avg_target_rate_ < estimated_transition_rate_down) ||
(encoder_state_ == kStressedEncoding && avg_target_rate_ < max_rate)) {
// Get the down-sampling action: based on content class, and how low
// average target rate is relative to transition rate.
//获取预设预估权值用于开启降操作
uint8_t spatial_fact =
kSpatialAction[content_class_ +
9 * RateClass(estimated_transition_rate_down)];
uint8_t temp_fact =
kTemporalAction[content_class_ +
9 * RateClass(estimated_transition_rate_down)];
switch (spatial_fact) {
case 4: {
action_.spatial = kOneQuarterSpatialUniform;
break;
}
case 2: {
action_.spatial = kOneHalfSpatialUniform;
break;
}
case 1: {
action_.spatial = kNoChangeSpatial;
break;
}
default: { assert(false); }
}
switch (temp_fact) {
case 3: {
action_.temporal = kTwoThirdsTemporal;
break;
}
case 2: {
action_.temporal = kOneHalfTemporal;
break;
}
case 1: {
action_.temporal = kNoChangeTemporal;
break;
}
default: { assert(false); }
}
// Only allow for one action (spatial or temporal) at a given time.
assert(action_.temporal == kNoChangeTemporal ||
action_.spatial == kNoChangeSpatial);
// Adjust cases not captured in tables, mainly based on frame rate, and
// also check for odd frame sizes.
//调整降低动作数值
AdjustAction();
// Update down-sampling state.
if (action_.spatial != kNoChangeSpatial ||
action_.temporal != kNoChangeTemporal) {
UpdateDownsamplingState(kDownResolution);
return true;
}
}
return false;
}
float VCMQmResolution::GetTransitionRate(float fac_width,
float fac_height,
float fac_temp,
float scale_fac) {
//通过图片尺寸获取图片类型(0~8)
ImageType image_type =
GetImageType(static_cast<uint16_t>(fac_width * width_),
static_cast<uint16_t>(fac_height * height_));
//获取帧率等级(0~4) (10/15/20/)
FrameRateLevelClass framerate_level =
FrameRateLevel(fac_temp * avg_incoming_framerate_);
// If we are checking for going up temporally, and this is the last
// temporal action, then use native frame rate.
//如果这是最后一次更改同时需要提升帧率
if (down_action_history_[1].temporal == kNoChangeTemporal &&
fac_temp > 1.0f) {
framerate_level = FrameRateLevel(native_frame_rate_);
}
// The maximum allowed rate below which down-sampling is allowed:
// Nominal values based on image format (frame size and frame rate).
//按照帧率等级一定比率取阈值流量
float max_rate = kFrameRateFac[framerate_level] * kMaxRateQm[image_type];
//按照kVGA分成两块参数进行匹配
uint8_t image_class = image_type > kVGA ? 1 : 0;
//具体的通过content_class_确定缩放比率
uint8_t table_index = image_class * 9 + content_class_;
// Scale factor for down-sampling transition threshold:
// factor based on the content class and the image size.
float scaleTransRate = kScaleTransRateQm[table_index];
// Threshold bitrate for resolution action.
return static_cast<float>(scale_fac * scaleTransRate * max_rate);
}
void VCMQmResolution::UpdateDownsamplingState(UpDownAction up_down) {
//如果为提升状态
if (up_down == kUpResolution) {
//更改分辨率系数
qm_->spatial_width_fact = 1.0f / kFactorWidthSpatial[action_.spatial];
qm_->spatial_height_fact = 1.0f / kFactorHeightSpatial[action_.spatial];
// If last spatial action was 1/2x1/2, we undo it in two steps, so the
// spatial scale factor in this first step is modified as (4.0/3.0 / 2.0).
//如果当前动作为2档,重新修整系数
if (action_.spatial == kOneQuarterSpatialUniform) {
qm_->spatial_width_fact = 1.0f *
kFactorWidthSpatial[kOneHalfSpatialUniform] /
kFactorWidthSpatial[kOneQuarterSpatialUniform];
qm_->spatial_height_fact =
1.0f * kFactorHeightSpatial[kOneHalfSpatialUniform] /
kFactorHeightSpatial[kOneQuarterSpatialUniform];
}
qm_->temporal_fact = 1.0f / kFactorTemporal[action_.temporal];
//移除最后一次添加的动作
RemoveLastDownAction();
} else if (up_down == kDownResolution) {
//检查约束条件
ConstrainAmountOfDownSampling();
//换挡
ConvertSpatialFractionalToWhole();
//更改系数
qm_->spatial_width_fact = kFactorWidthSpatial[action_.spatial];
qm_->spatial_height_fact = kFactorHeightSpatial[action_.spatial];
qm_->temporal_fact = kFactorTemporal[action_.temporal];
//添加最后一次动作
InsertLatestDownAction();
} else {
// This function should only be called if either the Up or Down action
// has been selected.
assert(false);
}
//更改分辨率
UpdateCodecResolution();
//更改削减因子
state_dec_factor_spatial_ = state_dec_factor_spatial_ *
qm_->spatial_width_fact *
qm_->spatial_height_fact;
state_dec_factor_temporal_ = state_dec_factor_temporal_ * qm_->temporal_fact;
}
void VCMQmResolution::UpdateCodecResolution() {
if (action_.spatial != kNoChangeSpatial) {
qm_->change_resolution_spatial = true;
qm_->codec_width =
static_cast<uint16_t>(width_ / qm_->spatial_width_fact + 0.5f);
qm_->codec_height =
static_cast<uint16_t>(height_ / qm_->spatial_height_fact + 0.5f);
// Size should not exceed native sizes.
assert(qm_->codec_width <= native_width_);
assert(qm_->codec_height <= native_height_);
// New sizes should be multiple of 2, otherwise spatial should not have
// been selected.
assert(qm_->codec_width % 2 == 0);
assert(qm_->codec_height % 2 == 0);
}
if (action_.temporal != kNoChangeTemporal) {
qm_->change_resolution_temporal = true;
// Update the frame rate based on the average incoming frame rate.
qm_->frame_rate = avg_incoming_framerate_ / qm_->temporal_fact + 0.5f;
if (down_action_history_[0].temporal == 0) {
// When we undo the last temporal-down action, make sure we go back up
// to the native frame rate. Since the incoming frame rate may
// fluctuate over time, |avg_incoming_framerate_| scaled back up may
// be smaller than |native_frame rate_|.
qm_->frame_rate = native_frame_rate_;
}
}
}
uint8_t VCMQmResolution::RateClass(float transition_rate) {
//对端流量低于预估流量的一半则为0,对端流量大于预估流量则为2,其他为1
return avg_target_rate_ < (kFacLowRate * transition_rate)
? 0
: (avg_target_rate_ >= transition_rate ? 2 : 1);
}
// TODO(marpan): Would be better to capture these frame rate adjustments by
// extending the table data (qm_select_data.h).
void VCMQmResolution::AdjustAction() {
// If the spatial level is default state (neither low or high), motion level
// is not high, and spatial action was selected, switch to 2/3 frame rate
// reduction if the average incoming frame rate is high.
if (spatial_.level == kDefault && motion_.level != kHigh &&
action_.spatial != kNoChangeSpatial &&
framerate_level_ == kFrameRateHigh) {
action_.spatial = kNoChangeSpatial;
action_.temporal = kTwoThirdsTemporal;
}
// If both motion and spatial level are low, and temporal down action was
// selected, switch to spatial 3/4x3/4 if the frame rate is not above the
// lower middle level (|kFrameRateMiddle1|).
//如果图像分析后等级为低等级同时帧率低于2档,帧率需切换
if (motion_.level == kLow && spatial_.level == kLow &&
framerate_level_ <= kFrameRateMiddle1 &&
action_.temporal != kNoChangeTemporal) {
action_.spatial = kOneHalfSpatialUniform;
action_.temporal = kNoChangeTemporal;
}
// If spatial action is selected, and there has been too much spatial
// reduction already (i.e., 1/4), then switch to temporal action if the
// average frame rate is not low.
//如果分辨率已经更改到最高级别,同时帧率还在低级别就需要提升修改帧率等级
if (action_.spatial != kNoChangeSpatial &&
down_action_history_[0].spatial == kOneQuarterSpatialUniform &&
framerate_level_ != kFrameRateLow) {
action_.spatial = kNoChangeSpatial;
action_.temporal = kTwoThirdsTemporal;
}
//当图层大于2个的时候是不能更改帧率,只能改分辨率
// Never use temporal action if number of temporal layers is above 2.
if (num_layers_ > 2) {
if (action_.temporal != kNoChangeTemporal) {
action_.spatial = kOneHalfSpatialUniform;
}
action_.temporal = kNoChangeTemporal;
}
// If spatial action was selected, we need to make sure the frame sizes
// are multiples of two. Otherwise switch to 2/3 temporal.
//如果分辨率不符合更改条件,则更改帧率
if (action_.spatial != kNoChangeSpatial && !EvenFrameSize()) {
action_.spatial = kNoChangeSpatial;
// Only one action (spatial or temporal) is allowed at a given time, so need
// to check whether temporal action is currently selected.
action_.temporal = kTwoThirdsTemporal;
}
}
void VCMQmResolution::ConvertSpatialFractionalToWhole() {
// If 3/4 spatial is selected, check if there has been another 3/4,
// and if so, combine them into 1/2. 1/2 scaling is more efficient than 9/16.
// Note we define 3/4x3/4 spatial as kOneHalfSpatialUniform.
//如果为1档
if (action_.spatial == kOneHalfSpatialUniform) {
//如果历史记录中还有一档记录
bool found = false;
int isel = kDownActionHistorySize;
for (int i = 0; i < kDownActionHistorySize; ++i) {
if (down_action_history_[i].spatial == kOneHalfSpatialUniform) {
isel = i;
found = true;
break;
}
}
if (found) {
//切换为2档
action_.spatial = kOneQuarterSpatialUniform;
state_dec_factor_spatial_ =
state_dec_factor_spatial_ /
(kFactorWidthSpatial[kOneHalfSpatialUniform] *
kFactorHeightSpatial[kOneHalfSpatialUniform]);
// Check if switching to 1/2x1/2 (=1/4) spatial is allowed.
//检查约束条件是否合适
ConstrainAmountOfDownSampling();
//约束检查不通过
if (action_.spatial == kNoChangeSpatial) {
//切回去
// Not allowed. Go back to 3/4x3/4 spatial.
action_.spatial = kOneHalfSpatialUniform;
state_dec_factor_spatial_ =
state_dec_factor_spatial_ *
kFactorWidthSpatial[kOneHalfSpatialUniform] *
kFactorHeightSpatial[kOneHalfSpatialUniform];
} else {
//在历史记录中删除找到的一档记录
// Switching is allowed. Remove 3/4x3/4 from the history, and update
// the frame size.
for (int i = isel; i < kDownActionHistorySize - 1; ++i) {
down_action_history_[i].spatial = down_action_history_[i + 1].spatial;
}
//更改宽高
width_ = width_ * kFactorWidthSpatial[kOneHalfSpatialUniform];
height_ = height_ * kFactorHeightSpatial[kOneHalfSpatialUniform];
}
}
}
}
// Returns false if the new frame sizes, under the current spatial action,
// are not multiples of two.
bool VCMQmResolution::EvenFrameSize() {
if (action_.spatial == kOneHalfSpatialUniform) {
if ((width_ * 3 / 4) % 2 != 0 || (height_ * 3 / 4) % 2 != 0) {
return false;
}
} else if (action_.spatial == kOneQuarterSpatialUniform) {
if ((width_ * 1 / 2) % 2 != 0 || (height_ * 1 / 2) % 2 != 0) {
return false;
}
}
return true;
}
void VCMQmResolution::InsertLatestDownAction() {
if (action_.spatial != kNoChangeSpatial) {
for (int i = kDownActionHistorySize - 1; i > 0; --i) {
down_action_history_[i].spatial = down_action_history_[i - 1].spatial;
}
down_action_history_[0].spatial = action_.spatial;
}
if (action_.temporal != kNoChangeTemporal) {
for (int i = kDownActionHistorySize - 1; i > 0; --i) {
down_action_history_[i].temporal = down_action_history_[i - 1].temporal;
}
down_action_history_[0].temporal = action_.temporal;
}
}
void VCMQmResolution::RemoveLastDownAction() {
if (action_.spatial != kNoChangeSpatial) {
// If the last spatial action was 1/2x1/2 we replace it with 3/4x3/4.
if (action_.spatial == kOneQuarterSpatialUniform) {
down_action_history_[0].spatial = kOneHalfSpatialUniform;
} else {
for (int i = 0; i < kDownActionHistorySize - 1; ++i) {
down_action_history_[i].spatial = down_action_history_[i + 1].spatial;
}
down_action_history_[kDownActionHistorySize - 1].spatial =
kNoChangeSpatial;
}
}
if (action_.temporal != kNoChangeTemporal) {
for (int i = 0; i < kDownActionHistorySize - 1; ++i) {
down_action_history_[i].temporal = down_action_history_[i + 1].temporal;
}
down_action_history_[kDownActionHistorySize - 1].temporal =
kNoChangeTemporal;
}
}
void VCMQmResolution::ConstrainAmountOfDownSampling() {
// Sanity checks on down-sampling selection:
// override the settings for too small image size and/or frame rate.
// Also check the limit on current down-sampling states.
float spatial_width_fact = kFactorWidthSpatial[action_.spatial];
float spatial_height_fact = kFactorHeightSpatial[action_.spatial];
float temporal_fact = kFactorTemporal[action_.temporal];
float new_dec_factor_spatial =
state_dec_factor_spatial_ * spatial_width_fact * spatial_height_fact;
float new_dec_factor_temp = state_dec_factor_temporal_ * temporal_fact;
// No spatial sampling if current frame size is too small, or if the
// amount of spatial down-sampling is above maximum spatial down-action.
if ((width_ * height_) <= kMinImageSize ||
new_dec_factor_spatial > kMaxSpatialDown) {
action_.spatial = kNoChangeSpatial;
new_dec_factor_spatial = state_dec_factor_spatial_;
}
// No frame rate reduction if average frame rate is below some point, or if
// the amount of temporal down-sampling is above maximum temporal down-action.
if (avg_incoming_framerate_ <= kMinFrameRate ||
new_dec_factor_temp > kMaxTempDown) {
action_.temporal = kNoChangeTemporal;
new_dec_factor_temp = state_dec_factor_temporal_;
}
// Check if the total (spatial-temporal) down-action is above maximum allowed,
// if so, disallow the current selected down-action.
if (new_dec_factor_spatial * new_dec_factor_temp > kMaxTotalDown) {
if (action_.spatial != kNoChangeSpatial) {
action_.spatial = kNoChangeSpatial;
} else if (action_.temporal != kNoChangeTemporal) {
action_.temporal = kNoChangeTemporal;
} else {
// We only allow for one action (spatial or temporal) at a given time, so
// either spatial or temporal action is selected when this function is
// called. If the selected action is disallowed from one of the above
// 2 prior conditions (on spatial & temporal max down-action), then this
// condition "total down-action > |kMaxTotalDown|" would not be entered.
assert(false);
}
}
}
void VCMQmResolution::PickSpatialOrTemporal() {
// Pick the one that has had the most down-sampling thus far.
//如果宽高削减的多则开始提升宽高,反之则提升帧率
if (state_dec_factor_spatial_ > state_dec_factor_temporal_) {
action_.spatial = down_action_history_[0].spatial;
action_.temporal = kNoChangeTemporal;
} else {
action_.spatial = kNoChangeSpatial;
action_.temporal = down_action_history_[0].temporal;
}
}
// TODO(marpan): Update when we allow for directional spatial down-sampling.
void VCMQmResolution::SelectSpatialDirectionMode(float transition_rate) {
// Default is 4/3x4/3
// For bit rates well below transitional rate, we select 2x2.
if (avg_target_rate_ < transition_rate * kRateRedSpatial2X2) {
qm_->spatial_width_fact = 2.0f;
qm_->spatial_height_fact = 2.0f;
}
// Otherwise check prediction errors and aspect ratio.
float spatial_err = 0.0f;
float spatial_err_h = 0.0f;
float spatial_err_v = 0.0f;
if (content_metrics_) {
spatial_err = content_metrics_->spatial_pred_err;
spatial_err_h = content_metrics_->spatial_pred_err_h;
spatial_err_v = content_metrics_->spatial_pred_err_v;
}
// Favor 1x2 if aspect_ratio is 16:9.
if (aspect_ratio_ >= 16.0f / 9.0f) {
// Check if 1x2 has lowest prediction error.
if (spatial_err_h < spatial_err && spatial_err_h < spatial_err_v) {
qm_->spatial_width_fact = 2.0f;
qm_->spatial_height_fact = 1.0f;
}
}
// Check for 4/3x4/3 selection: favor 2x2 over 1x2 and 2x1.
if (spatial_err < spatial_err_h * (1.0f + kSpatialErr2x2VsHoriz) &&
spatial_err < spatial_err_v * (1.0f + kSpatialErr2X2VsVert)) {
qm_->spatial_width_fact = 4.0f / 3.0f;
qm_->spatial_height_fact = 4.0f / 3.0f;
}
// Check for 2x1 selection.
if (spatial_err_v < spatial_err_h * (1.0f - kSpatialErrVertVsHoriz) &&
spatial_err_v < spatial_err * (1.0f - kSpatialErr2X2VsVert)) {
qm_->spatial_width_fact = 1.0f;
qm_->spatial_height_fact = 2.0f;
}
}
} // namespace webrtc