SVT-AV1 svt_aom_motion_estimation_kernel 函数分析

void *svt_aom_motion_estimation_kernel(void *input_ptr) // 运动估计内核主函数，接收线程输入参数

{

// 从输入参数中获取线程上下文指针

EbThreadContext * thread_ctx = (EbThreadContext *)input_ptr;

// 从线程上下文中获取运动估计上下文指针

MotionEstimationContext_t *me_context_ptr = (MotionEstimationContext_t *)thread_ctx->priv;

// 输入结果包装器指针，用于接收来自上游的数据

EbObjectWrapper * in_results_wrapper_ptr;

// 输出结果包装器指针，用于向下游传递处理结果

EbObjectWrapper * out_results_wrapper;

for (;;) { // 无限循环，持续处理输入的图片数据

// 从输入FIFO队列中获取完整的图片决策结果对象

EB_GET_FULL_OBJECT(me_context_ptr->picture_decision_results_input_fifo_ptr,

&in_results_wrapper_ptr);

// 从包装器中提取图片决策结果指针

PictureDecisionResults *in_results_ptr = (PictureDecisionResults *)

in_results_wrapper_ptr->object_ptr;

// 从输入结果中获取图片父控制集指针，包含当前图片的所有控制信息

PictureParentControlSet *pcs = (PictureParentControlSet *)

in_results_ptr->pcs_wrapper->object_ptr;

// 获取序列控制集指针，包含编码序列的全局配置参数

SequenceControlSet *scs = pcs->scs;

// 根据输入任务类型设置相应的运动估计类型

if (in_results_ptr->task_type == TASK_TFME)

// 如果是时域滤波运动估计任务，设置为MCTF模式

me_context_ptr->me_ctx->me_type = ME_MCTF;

else if (in_results_ptr->task_type == TASK_PAME || in_results_ptr->task_type == TASK_SUPERRES_RE_ME)

// 如果是预分析运动估计或超分辨率重新运动估计任务，设置为开环模式

me_context_ptr->me_ctx->me_type = ME_OPEN_LOOP;

else if (in_results_ptr->task_type == TASK_DG_DETECTOR_HME)

// 如果是动态GOP检测分层运动估计任务，设置为检测器模式

me_context_ptr->me_ctx->me_type = ME_DG_DETECTOR;

// 运动估计内核信号推导：根据任务类型设置具体的ME参数

if ((in_results_ptr->task_type == TASK_PAME) ||

(in_results_ptr->task_type == TASK_SUPERRES_RE_ME))

// 对于预分析或超分辨率重新ME任务，推导标准运动估计信号

svt_aom_sig_deriv_me(scs, pcs, me_context_ptr->me_ctx);

else if (in_results_ptr->task_type == TASK_TFME)

// 对于时域滤波任务，推导特定的时域滤波运动估计信号

svt_aom_sig_deriv_me_tf(pcs, me_context_ptr->me_ctx);

// 处理预分析运动估计和超分辨率重新运动估计任务

if ((in_results_ptr->task_type == TASK_PAME) ||

(in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) {

// 1/16分辨率图片缓冲区指针，用于粗粒度运动搜索

EbPictureBufferDesc *sixteenth_picture_ptr;

// 1/4分辨率图片缓冲区指针，用于中等粒度运动搜索

EbPictureBufferDesc *quarter_picture_ptr;

// 填充后的输入图片缓冲区指针，用于边界处理

EbPictureBufferDesc *input_padded_pic;

// 原始输入图片缓冲区指针

EbPictureBufferDesc *input_pic;

// 预分析引用对象指针，包含各种分辨率的图片数据

EbPaReferenceObject *pa_ref_obj_;

// 断言检查引用图片的生命周期计数 (调试版本使用)

//assert((int)pcs->pa_ref_pic_wrapper->live_count > 0);

// 从图片控制集中获取预分析引用对象

pa_ref_obj_ = (EbPaReferenceObject *)

pcs->pa_ref_pic_wrapper->object_ptr;

// 设置1/4和1/16分辨率的ME输入缓冲区，已经过滤或抽取

// 获取1/4分辨率下采样图片指针，用于分层运动估计的中层

quarter_picture_ptr = (EbPictureBufferDesc *)

pa_ref_obj_->quarter_downsampled_picture_ptr;

// 获取1/16分辨率下采样图片指针，用于分层运动估计的最粗层

sixteenth_picture_ptr = (EbPictureBufferDesc *)

pa_ref_obj_->sixteenth_downsampled_picture_ptr;

// 获取填充后的输入图片，确保边界像素可用于运动搜索

input_padded_pic = (EbPictureBufferDesc *)pa_ref_obj_->input_padded_pic;

// 获取增强后的输入图片，通常是经过预处理的原始图片

input_pic = pcs->enhanced_pic;

// 图片分段处理：将图片划分为多个段以支持并行处理

// 获取当前处理的段索引

uint32_t segment_index = in_results_ptr->segment_index;

// 计算图片宽度包含的64x64块数量（向上取整）

uint32_t pic_width_in_b64 = (pcs->aligned_width + scs->b64_size - 1) / scs->b64_size;

// 计算图片高度包含的64x64块数量（向上取整）

uint32_t picture_height_in_b64 = (pcs->aligned_height + scs->b64_size - 1) / scs->b64_size;

// Y方向段索引

uint32_t y_segment_index;

// X方向段索引

uint32_t x_segment_index;

// 将一维段索引转换为二维坐标(x,y)

SEGMENT_CONVERT_IDX_TO_XY(segment_index, x_segment_index, y_segment_index, pcs->me_segments_column_count);

// 计算当前段在X方向的起始64x64块索引

uint32_t x_b64_start_index = SEGMENT_START_IDX(x_segment_index, pic_width_in_b64, pcs->me_segments_column_count);

// 计算当前段在X方向的结束64x64块索引

uint32_t x_b64_end_index = SEGMENT_END_IDX(x_segment_index, pic_width_in_b64, pcs->me_segments_column_count);

// 计算当前段在Y方向的起始64x64块索引

uint32_t y_b64_start_index = SEGMENT_START_IDX(y_segment_index, picture_height_in_b64, pcs->me_segments_row_count);

// 计算当前段在Y方向的结束64x64块索引

uint32_t y_b64_end_index = SEGMENT_END_IDX(y_segment_index, picture_height_in_b64, pcs->me_segments_row_count);

// 初始化运动估计跳过标志

Bool skip_me = FALSE;

// 检查当前图片是否应该跳过运动估计（例如在多遍编码中）

if (svt_aom_is_pic_skipped(pcs)) //判断图片是否被跳过

skip_me = TRUE;

// 为第一遍编码跳过ME，因为ME已经执行过了

if (!skip_me) {

// 只对非I帧执行运动估计（I帧只有帧内预测）

if (pcs->slice_type != I_SLICE) {

// 如果引用图片的分辨率与输入不同，使用缩放的源引用

svt_aom_use_scaled_source_refs_if_needed(pcs,

input_pic,

pa_ref_obj_,

&input_padded_pic,

&quarter_picture_ptr,

&sixteenth_picture_ptr);

// 64x64块级运动估计循环：遍历当前段内的所有64x64块

// Y方向遍历当前段内的64x64块

for (uint32_t y_b64_index = y_b64_start_index; y_b64_index < y_b64_end_index; ++y_b64_index) {

// X方向遍历当前段内的64x64块

for (uint32_t x_b64_index = x_b64_start_index; x_b64_index < x_b64_end_index; ++x_b64_index) {

// 计算当前64x64块在整个图片中的线性索引

uint32_t b64_index = (uint16_t)(x_b64_index + y_b64_index * pic_width_in_b64);

// 计算当前64x64块在图片中的X坐标起始位置（像素单位）

uint32_t b64_origin_x = x_b64_index * scs->b64_size;

// 计算当前64x64块在图片中的Y坐标起始位置（像素单位）

uint32_t b64_origin_y = y_b64_index * scs->b64_size;

// 从输入图片加载64x64块到中间缓冲区

// 计算当前64x64块在输入图片缓冲区中的起始位置索引

uint32_t buffer_index = (input_pic->org_y + b64_origin_y) * input_pic->stride_y +

input_pic->org_x + b64_origin_x;

#ifdef ARCH_X86_64

// 获取当前64x64块在填充图片中的起始指针

uint8_t *src_ptr = &input_padded_pic->buffer_y[buffer_index];

// 计算实际的块高度（处理图片边界情况）

uint32_t b64_height = (pcs->aligned_height - b64_origin_y) < BLOCK_SIZE_64

? pcs->aligned_height - b64_origin_y : BLOCK_SIZE_64;

// 数据预取优化：提前加载数据到CPU缓存以提高访问速度

// 预取提示级别：_MM_HINT_T0(L1), _MM_HINT_T1(L2), _MM_HINT_T2(L3), _MM_HINT_NTA(非时间局部性)

for (uint32_t i = 0; i < b64_height; i++) {

// 计算每行数据的地址并预取到L3缓存

char const *p = (char const *)(src_ptr + i * input_padded_pic->stride_y);

_mm_prefetch(p, _MM_HINT_T2);

}

#endif

// 设置运动估计上下文中的64x64块源数据指针

me_context_ptr->me_ctx->b64_src_ptr = &input_padded_pic->buffer_y[buffer_index];

// 设置源数据的行步长（stride）

me_context_ptr->me_ctx->b64_src_stride = input_padded_pic->stride_y;

// 加载1/4分辨率抽取的超级块到1/4中间超级块缓冲区

if (me_context_ptr->me_ctx->enable_hme_level1_flag) {

// 计算1/4分辨率图片中对应位置的缓冲区索引（坐标右移1位即除以2）

buffer_index = (quarter_picture_ptr->org_y + (b64_origin_y >> 1)) * quarter_picture_ptr->stride_y +

quarter_picture_ptr->org_x + (b64_origin_x >> 1);

// 设置1/4分辨率64x64块缓冲区指针

me_context_ptr->me_ctx->quarter_b64_buffer = &quarter_picture_ptr->buffer_y[buffer_index];

// 设置1/4分辨率缓冲区的行步长

me_context_ptr->me_ctx->quarter_b64_buffer_stride = quarter_picture_ptr->stride_y;

}

// 加载1/16分辨率抽取的超级块到1/16中间超级块缓冲区

if (me_context_ptr->me_ctx->enable_hme_level0_flag) {

// 计算1/16分辨率图片中对应位置的缓冲区索引（坐标右移2位即除以4）

buffer_index = (sixteenth_picture_ptr->org_y + (b64_origin_y >> 2)) * sixteenth_picture_ptr->stride_y +

sixteenth_picture_ptr->org_x + (b64_origin_x >> 2);

// 设置1/16分辨率64x64块缓冲区指针

me_context_ptr->me_ctx->sixteenth_b64_buffer = &sixteenth_picture_ptr->buffer_y[buffer_index];

// 设置1/16分辨率缓冲区的行步长

me_context_ptr->me_ctx->sixteenth_b64_buffer_stride = sixteenth_picture_ptr->stride_y;

}

// 设置运动估计类型为开环模式

me_context_ptr->me_ctx->me_type = ME_OPEN_LOOP;

// 配置运动估计搜索参数（仅适用于PAME和超分辨率重新ME任务）

if ((in_results_ptr->task_type == TASK_PAME) || (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) {

// 设置要搜索的引用列表数量：P帧只搜索List0，B帧搜索List0+List1

me_context_ptr->me_ctx->num_of_list_to_search =

(pcs->slice_type == P_SLICE) ? 1 /*只搜索List 0*/

: 2 /*搜索List 0 + 1*/;

// 设置List0中要搜索的引用图片数量

me_context_ptr->me_ctx->num_of_ref_pic_to_search[0] = pcs->ref_list0_count_try;

// 对于B帧，设置List1中要搜索的引用图片数量

if (pcs->slice_type == B_SLICE)

me_context_ptr->me_ctx->num_of_ref_pic_to_search[1] = pcs->ref_list1_count_try;

// 设置当前图片的时域层索引

me_context_ptr->me_ctx->temporal_layer_index = pcs->temporal_layer_index;

// 设置当前图片是否为引用图片标志

me_context_ptr->me_ctx->is_ref = pcs->is_ref;

// 处理帧缩放（超分辨率或调整大小）情况下的引用图片配置

if (pcs->frame_superres_enabled || pcs->frame_resize_enabled) {

// 遍历所有引用列表（List0和List1）

for (int i = 0; i < me_context_ptr->me_ctx->num_of_list_to_search; i++) {

// 遍历当前列表中的所有引用图片

for (int j = 0; j < me_context_ptr->me_ctx->num_of_ref_pic_to_search[i]; j++) {

// 断言检查引用图片的生命周期计数（调试版本）

//assert((int)pcs->ref_pa_pic_ptr_array[i][j]->live_count > 0);

// 获取超分辨率分母索引

uint8_t sr_denom_idx = svt_aom_get_denom_idx(pcs->superres_denom);

// 获取调整大小分母索引

uint8_t resize_denom_idx = svt_aom_get_denom_idx(pcs->resize_denom);

// 获取引用对象指针

EbPaReferenceObject *ref_object =

(EbPaReferenceObject *)pcs->ref_pa_pic_ptr_array[i][j]->object_ptr;

// 设置缩放后的全分辨率引用图片指针

me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_ptr =

ref_object->downscaled_input_padded_picture_ptr[sr_denom_idx][resize_denom_idx];

// 设置缩放后的1/4分辨率引用图片指针

me_context_ptr->me_ctx->me_ds_ref_array[i][j].quarter_picture_ptr =

ref_object->downscaled_quarter_downsampled_picture_ptr[sr_denom_idx][resize_denom_idx];

// 设置缩放后的1/16分辨率引用图片指针

me_context_ptr->me_ctx->me_ds_ref_array[i][j].sixteenth_picture_ptr =

ref_object->downscaled_sixteenth_downsampled_picture_ptr[sr_denom_idx][resize_denom_idx];

// 设置引用图片的编号

me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_number =

ref_object->picture_number;

}

} else {

// 处理正常情况（无帧缩放）下的引用图片配置

// 遍历所有引用列表（List0和List1）

for (int i = 0; i < me_context_ptr->me_ctx->num_of_list_to_search; i++) {

// 遍历当前列表中的所有引用图片

for (int j = 0; j < me_context_ptr->me_ctx->num_of_ref_pic_to_search[i]; j++) {

// 断言检查引用图片的生命周期计数（调试版本）

//assert((int)pcs->ref_pa_pic_ptr_array[i][j]->live_count > 0);

// 获取引用对象指针

EbPaReferenceObject *ref_object =

(EbPaReferenceObject *)pcs->ref_pa_pic_ptr_array[i][j]->object_ptr;

// 设置原始全分辨率引用图片指针

me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_ptr =

ref_object->input_padded_pic;

// 设置原始1/4分辨率引用图片指针

me_context_ptr->me_ctx->me_ds_ref_array[i][j].quarter_picture_ptr =

ref_object->quarter_downsampled_picture_ptr;

// 设置原始1/16分辨率引用图片指针

me_context_ptr->me_ctx->me_ds_ref_array[i][j].sixteenth_picture_ptr =

ref_object->sixteenth_downsampled_picture_ptr;

// 设置引用图片的编号

me_context_ptr->me_ctx->me_ds_ref_array[i][j].picture_number =

ref_object->picture_number;

}

// 调用64x64块级运动估计核心函数

// 传入图片控制集、块索引、块坐标、ME上下文和输入图片

svt_aom_motion_estimation_b64(pcs,

b64_index,

b64_origin_x,

b64_origin_y,

me_context_ptr->me_ctx,

input_pic);

// 处理全局运动估计（仅适用于PAME和超分辨率重新ME任务）

if ((in_results_ptr->task_type == TASK_PAME) || (in_results_ptr->task_type == TASK_SUPERRES_RE_ME)) {

// 获取互斥锁，保护共享的已处理块计数器

svt_block_on_mutex(pcs->me_processed_b64_mutex);

// 增加已处理的64x64块计数

pcs->me_processed_b64_count++;

// 检查是否所有超级块的ME都已完成，以便执行全局运动估计

if (pcs->me_processed_b64_count == pcs->b64_total_count) {

// 如果启用了全局运动且满足条件，执行全局运动估计

if (pcs->gm_ctrls.enabled && (!pcs->gm_ctrls.pp_enabled || pcs->gm_pp_detected)) {

svt_aom_global_motion_estimation(pcs, input_pic);

} else {

// 当全局运动关闭时，初始化全局运动状态为OFF

memset(pcs->is_global_motion, FALSE, MAX_NUM_OF_REF_PIC_LIST * REF_LIST_MAX_DEPTH);

}

// 释放互斥锁

svt_release_mutex(pcs->me_processed_b64_mutex);

}

// 如果未启用环内最优帧内搜索且启用了时域预测布局，执行开环帧内搜索

if (scs->in_loop_ois == 0 && pcs->tpl_ctrls.enable)

// Y方向遍历当前段内的64x64块

for (uint32_t y_b64_index = y_b64_start_index; y_b64_index < y_b64_end_index; ++y_b64_index)

// X方向遍历当前段内的64x64块

for (uint32_t x_b64_index = x_b64_start_index; x_b64_index < x_b64_end_index; ++x_b64_index) {

// 计算当前64x64块的线性索引

uint32_t b64_index = (uint16_t)(x_b64_index + y_b64_index * pic_width_in_b64);

// 对当前块执行开环帧内搜索，为TPL（时域预测布局）提供信息

svt_aom_open_loop_intra_search_mb(pcs, b64_index, input_pic);

}

// 从输出FIFO队列获取空的结果对象

svt_get_empty_object(me_context_ptr->motion_estimation_results_output_fifo_ptr,

&out_results_wrapper);

// 从结果包装器中提取运动估计结果指针

MotionEstimationResults *out_results = (MotionEstimationResults *)

out_results_wrapper->object_ptr;

// 将输入的图片控制集包装器传递给输出结果

out_results->pcs_wrapper = in_results_ptr->pcs_wrapper;

// 传递段索引

out_results->segment_index = segment_index;

// 传递任务类型

out_results->task_type = in_results_ptr->task_type;

// 释放输入结果对象，表示已处理完成

svt_release_object(in_results_wrapper_ptr);

// 将完整的结果对象发送到输出队列，供下游模块使用

svt_post_full_object(out_results_wrapper);

} else if (in_results_ptr->task_type == TASK_TFME) {

// 处理时域滤波运动估计任务

// 全局运动预处理（仅针对基础B帧）

if (pcs->gm_ctrls.pp_enabled && pcs->gm_pp_enabled && in_results_ptr->segment_index==0)

// 执行全局运动预处理器，为时域滤波准备参数

svt_aom_gm_pre_processor(

pcs,

pcs->temp_filt_pcs_list);

// 开始时域滤波处理

// 设置运动估计类型为运动补偿时域滤波(MCTF)

me_context_ptr->me_ctx->me_type = ME_MCTF;

// 初始化时域滤波，处理指定段的数据

svt_av1_init_temporal_filtering(

pcs->temp_filt_pcs_list, pcs, me_context_ptr, in_results_ptr->segment_index);

// 释放输入结果对象

svt_release_object(in_results_wrapper_ptr);

} else if (in_results_ptr->task_type == TASK_DG_DETECTOR_HME) {

// 处理动态GOP检测分层运动估计任务

// 执行0级分层运动估计检测器，用于动态GOP结构优化

dg_detector_hme_level0(pcs, in_results_ptr->segment_index);

// 释放输入结果对象

svt_release_object(in_results_wrapper_ptr);

}

} // 主循环结束

// 函数返回NULL，表示线程正常退出

return NULL;

}

// clang-format on