/*
依赖线程tld来执行帧b中的CU(cuX, cuY)以p0为前向参考,p1为后向参考的satd
inter_satd = min{intra_satd, inter_satd}
过程:
1.分别取低分辨率前向参考帧p0,后向参考帧p1,当前待分析帧b
2.得到帧在长宽上CU的个数widthInCU/heightInCU
3.得到当前CU align后的序号cuXY
4.得到低分辨率的CU尺寸cuSize
5.得到当前CU align后的像素偏移量pelOffset
6.加载运动估计的一些信息:失真函数、运动估计算法、YUV像素等
7.将mv限制在帧范围内[mvmin, mvmax]
8.分别遍历两个预测方向,得到各个预测方向上的最优satd
1.取lowerResMvCosts
2.若不需要对该方向进行search,则表明之前已经计算过,直接更新后continue
3.取低分辨率运动向量
4.建立伪mvp集mvc,由于执行estimateCUCost()函数是逆zigzag顺序进行,所以这里的mvc与协议上的mvp位置相反,个数相同5个
1.若CU不是最后一列,则将右边CU的mv放进mvc
2.若CU不是最后一行
1.将下面CU的mv放进mvc
2.若CU不是第一列,则将左下角CU的mv放进mvc
3.若CU不是最后一列,则将右下角CU的mv放进mvc
3.这个mvc备选集不知道什么意义
5.遍历mvc中的每个mv,找到最优的mv,即mvp
1.进行运动补偿
2.计算satd
3.更新最优mvpcost和mvp
6.以mvp为中心,在[mvmin, mvmax]范围内进行运动估计,得到运动估计最优低分辨率mv,返回其satd
7.更新最优预测方向及其satd
9.若允许双向预测,则是Bslice,则计算双向预测的最优satd
1.分别以之前前后向运动估计得到的最优mv进行像素参考
2.得到的前后向最优mv参考像素进行均值计算,并计算satd
3.更新最优预测方向及其satd
4.分别得到前向/后向参考帧的co-located CU像素
5.得到前向/后向参考帧的co-located CU像素的均值,并计算satd
6.更新最优预测方向及其satd
10.若不允许双向预测,则是Pslice,还要考虑intra的satd
1.先将之前得到的inter satd加上一个惩罚lowresPenalty
2.对比之前计算的intra satd,更新最优预测方向及其satd
11.判断当前CU是否是边界CU,边界CU不能计算在帧satd中,因为他们不准
12.基于satd来计算aq satd
13.若是不是边界CU,则分别将satd和aq satd加入到帧/slice的satd/aq satd中
14.累加aq satd到行satd中
15.累加satd到低分辨率satd中
*/
void CostEstimateGroup::estimateCUCost(LookaheadTLD& tld, int cuX, int cuY, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme)
{
// 分别取前向参考帧p0,后向参考帧p1,当前待计算帧b
Lowres *fref0 = m_frames[p0];
Lowres *fref1 = m_frames[p1];
Lowres *fenc = m_frames[b];
// 若前向参考是权重的,且没开启层级运动估计,则取权重的前向参考帧,否则取原始前向参考帧
ReferencePlanes *wfref0 = (fenc->weightedRef[b - p0].isWeighted && !hme) ? &fenc->weightedRef[b - p0] : fref0;
// 根据是否hme来得到帧在长宽上的CU个数
const int widthInCU = hme ? m_lookahead.m_4x4Width : m_lookahead.m_8x8Width;
const int heightInCU = hme ? m_lookahead.m_4x4Height : m_lookahead.m_8x8Height;
// 若p1>b则双向预测
const int bBidir = (b < p1);
// align后的CU偏移量
const int cuXY = cuX + cuY * widthInCU;
const int cuXY_4x4 = (cuX / 2) + (cuY / 2) * widthInCU / 2;
// 低分辨率CU尺寸
const int cuSize = X265_LOWRES_CU_SIZE;
// align后的像素偏移量
const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * (hme ? fenc->lumaStride/2 : fenc->lumaStride);
// 载入运动估计必要信息:失真函数、运动估计算法、YUV像素等
if ((bBidir || bDoSearch[0] || bDoSearch[1]) && hme)
tld.me.setSourcePU(fenc->lowerResPlane[0], fenc->lumaStride / 2, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);
else if((bBidir || bDoSearch[0] || bDoSearch[1]) && !hme)
tld.me.setSourcePU(fenc->lowresPlane[0], fenc->lumaStride, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);
/* A small, arbitrary bias to avoid VBV problems caused by zero-residual lookahead blocks. */
int lowresPenalty = 4;
int listDist[2] = { b - p0, p1 - b};
MV mvmin, mvmax;
int bcost = tld.me.COST_MAX;
int listused = 0;
// TODO: restrict to slices boundaries
// establish search bounds that don't cross extended frame boundaries
// 限制mv范围在帧尺寸内
mvmin.x = (int32_t)(-cuX * cuSize - 8);
mvmin.y = (int32_t)(-cuY * cuSize - 8);
mvmax.x = (int32_t)((widthInCU - cuX - 1) * cuSize + 8);
mvmax.y = (int32_t)((heightInCU - cuY - 1) * cuSize + 8);
// 遍历运动方向
for (int i = 0; i < 1 + bBidir; i++)
{
// 取fencCost
int& fencCost = hme ? fenc->lowerResMvCosts[i][listDist[i]][cuXY] : fenc->lowresMvCosts[i][listDist[i]][cuXY];
int skipCost = INT_MAX;
// 如果不需要对该方向进行search,则表明之前已经计算过,直接更新
if (!bDoSearch[i])
{
COPY2_IF_LT(bcost, fencCost, listused, i + 1);
continue;
}
int numc = 0;
MV mvc[5], mvp;
// 取低分辨率运动向量
MV* fencMV = hme ? &fenc->lowerResMvs[i][listDist[i]][cuXY] : &fenc->lowresMvs[i][listDist[i]][cuXY];
ReferencePlanes* fref = i ? fref1 : wfref0;
/* Reverse-order MV prediction
建立伪mvp集mvc,由于estimateCUCost()是逆zigzag进行调用的
所以这里的mvc与协议上的mvp位置相反
问题:为什么要逆zigzag调用?直接按正常来不行么? */
#define MVC(mv) mvc[numc++] = mv;
// 若CU不是最后一列,则将右边的mv放进mvc
if (cuX < widthInCU - 1)
MVC(fencMV[1]);
// 若CU不是最后一行
if (!lastRow)
{
// 将下面的mv放进mvc
MVC(fencMV[widthInCU]);
// 若CU不是第一列
if (cuX > 0)
// 将左下角的mv放进mvc
MVC(fencMV[widthInCU - 1]);
// 若CU不是最后一列
if (cuX < widthInCU - 1)
// 将右下角的mv方向mvc
MVC(fencMV[widthInCU + 1]);
}
if (fenc->lowerResMvs[0][0] && !hme && fenc->lowerResMvCosts[i][listDist[i]][cuXY_4x4] > 0)
{
MVC((fenc->lowerResMvs[i][listDist[i]][cuXY_4x4]) * 2);
}
#undef MVC
// mvc备选集里没有mv,则置mvp = 0
if (!numc)
mvp = 0;
// mvc备选集里有mv
else
{
ALIGN_VAR_32(pixel, subpelbuf[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
int mvpcost = MotionEstimate::COST_MAX;
/* measure SATD cost of each neighbor MV (estimating merge analysis)
* and use the lowest cost MV as MVP (estimating AMVP). Since all
* mvc[] candidates are measured here, none are passed to motionEstimate */
// 遍历mvc中的每个mv
for (int idx = 0; idx < numc; idx++)
{
intptr_t stride = X265_LOWRES_CU_SIZE;
// 给予mvc中的mv进行运动补偿
pixel *src = fref->lowresMC(pelOffset, mvc[idx], subpelbuf, stride, hme);
// 得到satd
int cost = tld.me.bufSATD(src, stride);
// 更新最优mvp及其cost
COPY2_IF_LT(mvpcost, cost, mvp, mvc[idx]);
/* Except for mv0 case, everyting else is likely to have enough residual to not trigger the skip. */
// 若mvp为0向量 && 双向预测,则可能是skip,将该mvp的cost给skipCost
if (!mvp.notZero() && bBidir)
skipCost = cost;
}
}
/* ME will never return a cost larger than the cost @MVP, so we do not
* have to check that ME cost is more than the estimated merge cost
* 运动估计得到的satd一定会小于等于之前mvp得到的satd,因为搜索的mv包含mvp */
// 进行运动估计,得到其satd
if(!hme)
fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices);
else
fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices, fref->lowerResPlane[0]);
// 若skipcost<64 且skipcost < 这里运动估计的最优satd && 允许双向,则定为skip
if (skipCost < 64 && skipCost < fencCost && bBidir)
{
fencCost = skipCost;
*fencMV = 0;
}
// 更新最优mv的satd,并记录下参考方向
// listused = 0 intra
// = 1 前向
// = 2 后向
// = 3 双向
COPY2_IF_LT(bcost, fencCost, listused, i + 1);
} // end of for (int i = 0; i < 1 + bBidir; i++)
if (hme)
return;
// 若允许双向预测,则进行双向估计
if (bBidir) /* B, also consider bidir */
{
/* NOTE: the wfref0 (weightp) is not used for BIDIR */
/* avg(l0-mv, l1-mv) candidate */
ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;
pixel *src0 = fref0->lowresMC(pelOffset, fenc->lowresMvs[0][listDist[0]][cuXY], subpelbuf0, stride0, 0);
pixel *src1 = fref1->lowresMC(pelOffset, fenc->lowresMvs[1][listDist[1]][cuXY], subpelbuf1, stride1, 0);
ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
// 对双向预测的像素进行均值计算
primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
// 得到双向预测
int bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);
// 存储最优satd
COPY2_IF_LT(bcost, bicost, listused, 3);
/* co-located candidate */
// 得到前向co-located像素
src0 = fref0->lowresPlane[0] + pelOffset;
// 得到后向co-located像素
src1 = fref1->lowresPlane[0] + pelOffset;
// 计算他们的均值
primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, fref0->lumaStride, src1, fref1->lumaStride, 32);
// 得到satd
bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);
// 更新最优satd
COPY2_IF_LT(bcost, bicost, listused, 3);
// inter的cost要加上lowresPenalty
bcost += lowresPenalty;
}
// Pslice,Pslice允许intra,所以将intra与inter对比
else /* P, also consider intra */
{
// inter的cost要加上lowresPenalty
bcost += lowresPenalty;
// 若intra的satd < 之前计算的inter最优satd,则更新其为listused和cost
if (fenc->intraCost[cuXY] < bcost)
{
bcost = fenc->intraCost[cuXY];
listused = 0; // listused = 0表示intra
}
}
/* do not include edge blocks in the frame cost estimates, they are not very accurate */
// 判断当前CU是否边界CU,若是边界CU则不加入到frame的cost中
const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
// 得到adaptive quan satd
int bcostAq;
if (m_lookahead.m_param->rc.qgSize == 8)
bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor8x8[cuXY] + 128) >> 8) : bcost;
else
bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor[cuXY] +128) >> 8) : bcost;
// 若不是边界CU,则累加上satd和adaptive quan satd到frame/slice的satd/aq satd中
if (bFrameScoreCU)
{
if (slice < 0)
{
fenc->costEst[b - p0][p1 - b] += bcost;
fenc->costEstAq[b - p0][p1 - b] += bcostAq;
if (!listused && !bBidir)
fenc->intraMbs[b - p0]++;
}
else
{
m_slice[slice].costEst += bcost;
m_slice[slice].costEstAq += bcostAq;
if (!listused && !bBidir)
m_slice[slice].intraMbs++;
}
}
// 累加上当前CU的satd到行satd中
fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq;
// 存储下当前CU的satd
fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost, LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT));
}
CostEstimateGroup::estimateCUCost()
最新推荐文章于 2023-08-10 22:19:09 发布