调用栈
EncodePass
SaoGenerationDecision
-GatherSaoStatisticsLcu_BT_SSE2
- -countEdge
输入参数
- 原始图片+重构后图片地址及偏移。
- lcu长宽
输出参数
- BO/EO的差异值,及个数。
16列,逐行进行处理。
x86 内敛汇编(_mm_set1_epi8 in core::arch::x86_64 - Rust)
_mm_loadu_si128 :Loads 128-bits of integer data from memory into a new vector
_mm_set1_epi8(-128):Broadcasts 8-bit integer a
to all elements. -- 将值a赋值到128寄存器的每个lane中。
_mm_xor_si128:Computes the bitwise XOR of 128 bits (representing integer data) in a
and b
.
_mm_cmplt_epi8:Compares packed 8-bit integers in a
and b
for less-than. 逐8bit按位或,输出是啥?
小于,值为FF,不满足为0。
_mm_cmpgt_epi8:Compares packed 8-bit integers in a
and b
for greater-than.
与上面同理。
_mm_sub_epi8:
_mm_add_epi8:同理。
_mm_and_si128:
_mm_cmpeq_epi8:Compares packed 8-bit integers in a
and b
for equality.
_mm_sad_epu8:
_mm_setzero_si128:Returns a vector with all elements set to zero.
_mm_add_epi64:Adds packed 64-bit integers in a
and b
.
转换成c语言,代码实现逻辑如下(伪代码):
static void countEdge(__m128i *eoDiff, __m128i *eoCount, EB_BYTE ptr, EB_S32 offset, __m128i x0, __m128i diff, __m128i mask)
{
__m128i x1, x2;
__m128i c1, c2;
__m128i cat, select;
x1 = (__m128i *)(ptr + offset); // x0,x1...,x15 16个8bit整型int数据
x2 = (__m128i *)(ptr - offset);
x1 |= -128; // x0 | -128 , x1 | -128, ...x15 | -128
x2 |= -128;
tmp10 = (x0 < x1) ? 0xFF:0; tmp11 = (x0 > x1) ? 0xFF:0;
tmp20 = (x0 < x2) ? 0xFF:0; tmp21 = (x0 > x1) ? 0xFF:0;
c1 = tmp10 - tmp11; // x0-x15 y0-y15 xn-yn 逐8bit减
c2 = tmp20 - tmp21;
cat = c1 + c2; // x0-x15 y0-y15 xn+yn 逐8bit加
cat &= mask; // 128 bit 与
select = (cat == -2) ? 0xFF:0; // 逐8bit与-2等值比较,相等8bit置为FF,反之置为0
eoCount[0] = eoCount[0] - select; // 逐8bit减
tmp_abs = abs(diff & select); // 逐8bit abs
tmp_abs_sum0_15 = tmp_abs[0]+...tmp_abs[7];
tmp_abs_sum64_79 = tmp_abs[8]+...tmp_abs[15]; // 其他为0
eoDiff[0] = eoDiff[0] + (tmp_abs_sum0_15:tmp_abs_sum64_79); // 逐64bit加
select = (cat == -2) ? 0xFF:0; // 逐8bit与-2等值比较,相等8bit置为FF,反之置为0
eoCount[1] = eoCount[1] - select; // 逐8bit减
tmp_abs = abs(diff & select); // 逐8bit abs
tmp_abs_sum0_15 = tmp_abs[0]+...tmp_abs[7];
tmp_abs_sum64_79 = tmp_abs[8]+...tmp_abs[15]; // 其他为0
eoDiff[1] = eoDiff[1] + (tmp_abs_sum0_15:tmp_abs_sum64_79); // 逐64bit加
select = (cat == -2) ? 0xFF:0; // 逐8bit与-2等值比较,相等8bit置为FF,反之置为0
eoCount[2] = eoCount[2] - select; // 逐8bit减
tmp_abs = abs(diff & select); // 逐8bit abs
tmp_abs_sum0_15 = tmp_abs[0]+...tmp_abs[7];
tmp_abs_sum64_79 = tmp_abs[8]+...tmp_abs[15]; // 其他为0
eoDiff[2] = eoDiff[2] + (tmp_abs_sum0_15:tmp_abs_sum64_79); // 逐64bit加
select = (cat == -2) ? 0xFF:0; // 逐8bit与-2等值比较,相等8bit置为FF,反之置为0
eoCount[3] = eoCount[3] - select; // 逐8bit减
tmp_abs = abs(diff & select); // 逐8bit abs
tmp_abs_sum0_15 = tmp_abs[0]+...tmp_abs[7];
tmp_abs_sum64_79 = tmp_abs[8]+...tmp_abs[15]; // 其他为0
eoDiff[3] = eoDiff[3] + (tmp_abs_sum0_15:tmp_abs_sum64_79); // 逐64bit加
}