void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame )
{
pixel *src = frame->plane[0]; // get luma plane
int i_stride = frame->i_stride[0]; // line stride of luma plane
int i_height = frame->i_lines[0]; // height of luma plane
int i_width = frame->i_width[0]; // width of luma plane
// duplicate last row and column so that their interpolation doesn't have to
// be special-cased
for( int y = 0; y < i_height; y++ ) // // copy last column of luma pixel plane
src[i_width+y*i_stride] = src[i_width-1+y*i_stride];
// copy last row of luma pixel plane
memcpy( src+i_stride*i_height, src+i_stride*(i_height-1), (i_width+1) * sizeof(pixel) );
// interpolate center pixel of square consist of four points, for whole luma pixel plane
// 如果以1/4像素坐标来看, 下面的代码就是插值斜对角 1/2 像素点,
// 为什么 不是 计算 F, H, V, HV呢? 有疑惑 。。。
// see frame_init_lowres_core and diagram below
// A B C D E ...
// * * * * ...
// a b c d e ...
// * * * * ...
// 0 1 2 3 4 ...
// .......................................
h->mc.frame_init_lowres_core( src, frame->lowres[0], frame->lowres[1], frame->lowres[2], frame->lowres[3],
i_stride, frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres );
// expand 4 side for ipadh = 32 units(bytes), ipadv = 32 units(bytes)
// 对frame->lowres[0], frame->lowres[1], frame->lowres[2], frame->lowres[3]
// 1/2 像素帧分别进行四个边界的扩展, 每个边的扩展都是将该边的最后一个像素
// 扩展32(PADH, PADV)字节
x264_frame_expand_border_lowres( frame );
memset( frame->i_cost_est, -1, sizeof(frame->i_cost_est) );
// initialize for adaptive B-frame decision
for( int y = 0; y < h->param.i_bframe + 2; y++ )
for( int x = 0; x < h->param.i_bframe + 2; x++ )
frame->i_row_satds[y][x][0] = -1;
for( int y = 0; y <= !!h->param.i_bframe; y++ )
for( int x = 0; x <= h->param.i_bframe; x++ )
frame->lowres_mvs[y][x][0][0] = 0x7FFF;
}
static void frame_init_lowres_core( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,
intptr_t src_stride, intptr_t dst_stride, int width, int height )
{
for( int y = 0; y < height; y++ )
{
pixel *src1 = src0+src_stride;
pixel *src2 = src1+src_stride;
for( int x = 0; x<width; x++ )
{
// slower than naive bilinear, but matches asm
#define FILTER(a,b,c,d) ((((a+b+1)>>1)+((c+d+1)>>1)+1)>>1)
dst0[x] = FILTER(src0[2*x ], src1[2*x ], src0[2*x+1], src1[2*x+1]);
dsth[x] = FILTER(src0[2*x+1], src1[2*x+1], src0[2*x+2], src1[2*x+2]);
dstv[x] = FILTER(src1[2*x ], src2[2*x ], src1[2*x+1], src2[2*x+1]);
dstc[x] = FILTER(src1[2*x+1], src2[2*x+1], src1[2*x+2], src2[2*x+2]);
#undef FILTER
}
src0 += src_stride*2;
dst0 += dst_stride;
dsth += dst_stride;
dstv += dst_stride;
dstc += dst_stride;
}
}
void x264_frame_expand_border_lowres( x264_frame_t *frame )
{
for( int i = 0; i < 4; i++ )
plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres, PADH, PADV, 1, 1, 0 );
}
static void ALWAYS_INLINE plane_expand_border( pixel *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom, int b_chroma )
{
#define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
for( int y = 0; y < i_height; y++ )
{
/* left band */
pixel_memset( PPIXEL(-i_padh, y), PPIXEL(0, y), i_padh>>b_chroma, sizeof(pixel)<<b_chroma );
/* right band */
pixel_memset( PPIXEL(i_width, y), PPIXEL(i_width-1-b_chroma, y), i_padh>>b_chroma, sizeof(pixel)<<b_chroma );
}
/* upper band */
if( b_pad_top )
for( int y = 0; y < i_padv; y++ )
memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), (i_width+2*i_padh) * sizeof(pixel) );
/* lower band */
if( b_pad_bottom )
for( int y = 0; y < i_padv; y++ )
memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), (i_width+2*i_padh) * sizeof(pixel) );
#undef PPIXEL
}
// fast copy way
static void ALWAYS_INLINE pixel_memset( pixel *dst, pixel *src, int len, int size )
{
uint8_t *dstp = (uint8_t*)dst;
uint32_t v1 = *src;
uint32_t v2 = size == 1 ? v1 + (v1 << 8) : M16( src );
uint32_t v4 = size <= 2 ? v2 + (v2 << 16) : M32( src );
int i = 0;
len *= size;
/* Align the input pointer if it isn't already */
if( (intptr_t)dstp & (WORD_SIZE - 1) )
{
if( size <= 2 && ((intptr_t)dstp & 3) )
{
if( size == 1 && ((intptr_t)dstp & 1) )
dstp[i++] = v1;
if( (intptr_t)dstp & 2 )
{
M16( dstp+i ) = v2;
i += 2;
}
}
if( WORD_SIZE == 8 && (intptr_t)dstp & 4 )
{
M32( dstp+i ) = v4;
i += 4;
}
}
/* Main copy loop */
if( WORD_SIZE == 8 )
{
uint64_t v8 = v4 + ((uint64_t)v4<<32);
for( ; i < len - 7; i+=8 )
M64( dstp+i ) = v8;
}
for( ; i < len - 3; i+=4 )
M32( dstp+i ) = v4;
/* Finish up the last few bytes */
if( size <= 2 )
{
if( i < len - 1 )
{
M16( dstp+i ) = v2;
i += 2;
}
if( size == 1 && i != len )
dstp[i] = v1;
}
}